Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / third_party / WebKit / Source / core / html / parser / HTMLConstructionSite.cpp
1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 #include "config.h"
28 #include "core/html/parser/HTMLConstructionSite.h"
29
30 #include "core/HTMLElementFactory.h"
31 #include "core/HTMLNames.h"
32 #include "core/dom/Comment.h"
33 #include "core/dom/DocumentFragment.h"
34 #include "core/dom/DocumentType.h"
35 #include "core/dom/Element.h"
36 #include "core/dom/ScriptLoader.h"
37 #include "core/dom/TemplateContentDocumentFragment.h"
38 #include "core/dom/Text.h"
39 #include "core/frame/LocalFrame.h"
40 #include "core/html/HTMLFormElement.h"
41 #include "core/html/HTMLHtmlElement.h"
42 #include "core/html/HTMLPlugInElement.h"
43 #include "core/html/HTMLScriptElement.h"
44 #include "core/html/HTMLTemplateElement.h"
45 #include "core/html/parser/AtomicHTMLToken.h"
46 #include "core/html/parser/HTMLParserIdioms.h"
47 #include "core/html/parser/HTMLStackItem.h"
48 #include "core/html/parser/HTMLToken.h"
49 #include "core/loader/FrameLoader.h"
50 #include "core/loader/FrameLoaderClient.h"
51 #include "core/svg/SVGScriptElement.h"
52 #include "platform/NotImplemented.h"
53 #include "platform/text/TextBreakIterator.h"
54 #include <limits>
55
56 namespace blink {
57
58 using namespace HTMLNames;
59
60 static const unsigned maximumHTMLParserDOMTreeDepth = 512;
61
62 static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy)
63 {
64     if (!scriptingContentIsAllowed(parserContentPolicy))
65         element->stripScriptingAttributes(token->attributes());
66     element->parserSetAttributes(token->attributes());
67 }
68
69 static bool hasImpliedEndTag(const HTMLStackItem* item)
70 {
71     return item->hasTagName(ddTag)
72         || item->hasTagName(dtTag)
73         || item->hasTagName(liTag)
74         || item->hasTagName(optionTag)
75         || item->hasTagName(optgroupTag)
76         || item->hasTagName(pTag)
77         || item->hasTagName(rbTag)
78         || item->hasTagName(rpTag)
79         || item->hasTagName(rtTag)
80         || item->hasTagName(rtcTag);
81 }
82
83 static bool shouldUseLengthLimit(const ContainerNode& node)
84 {
85     return !isHTMLScriptElement(node)
86         && !isHTMLStyleElement(node)
87         && !isSVGScriptElement(node);
88 }
89
90 static unsigned textLengthLimitForContainer(const ContainerNode& node)
91 {
92     return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max();
93 }
94
95 static inline bool isAllWhitespace(const String& string)
96 {
97     return string.isAllSpecialCharacters<isHTMLSpace<UChar>>();
98 }
99
100 static inline void insert(HTMLConstructionSiteTask& task)
101 {
102     if (isHTMLTemplateElement(*task.parent))
103         task.parent = toHTMLTemplateElement(task.parent.get())->content();
104
105     if (ContainerNode* parent = task.child->parentNode())
106         parent->parserRemoveChild(*task.child);
107
108     if (task.nextChild)
109         task.parent->parserInsertBefore(task.child.get(), *task.nextChild);
110     else
111         task.parent->parserAppendChild(task.child.get());
112 }
113
114 static inline void executeInsertTask(HTMLConstructionSiteTask& task)
115 {
116     ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
117
118     insert(task);
119
120     if (task.child->isElementNode()) {
121         Element& child = toElement(*task.child);
122         child.beginParsingChildren();
123         if (task.selfClosing)
124             child.finishParsingChildren();
125     }
126 }
127
128 static inline void executeInsertTextTask(HTMLConstructionSiteTask& task)
129 {
130     ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
131     ASSERT(task.child->isTextNode());
132
133     // Merge text nodes into previous ones if possible:
134     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
135     Text* newText = toText(task.child.get());
136     Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
137     if (previousChild && previousChild->isTextNode()) {
138         Text* previousText = toText(previousChild);
139         unsigned lengthLimit = textLengthLimitForContainer(*task.parent);
140         if (previousText->length() + newText->length() < lengthLimit) {
141             previousText->parserAppendData(newText->data());
142             return;
143         }
144     }
145
146     insert(task);
147 }
148
149 static inline void executeReparentTask(HTMLConstructionSiteTask& task)
150 {
151     ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
152
153     if (ContainerNode* parent = task.child->parentNode())
154         parent->parserRemoveChild(*task.child);
155
156     task.parent->parserAppendChild(task.child);
157 }
158
159 static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task)
160 {
161     ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);
162
163     insert(task);
164 }
165
166 static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task)
167 {
168     ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren);
169
170     task.parent->parserTakeAllChildrenFrom(*task.oldParent());
171 }
172
173 void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task)
174 {
175     ASSERT(m_taskQueue.isEmpty());
176     if (task.operation == HTMLConstructionSiteTask::Insert)
177         return executeInsertTask(task);
178
179     if (task.operation == HTMLConstructionSiteTask::InsertText)
180         return executeInsertTextTask(task);
181
182     // All the cases below this point are only used by the adoption agency.
183
184     if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)
185         return executeInsertAlreadyParsedChildTask(task);
186
187     if (task.operation == HTMLConstructionSiteTask::Reparent)
188         return executeReparentTask(task);
189
190     if (task.operation == HTMLConstructionSiteTask::TakeAllChildren)
191         return executeTakeAllChildrenTask(task);
192
193     ASSERT_NOT_REACHED();
194 }
195
196 // This is only needed for TextDocuments where we might have text nodes
197 // approaching the default length limit (~64k) and we don't want to
198 // break a text node in the middle of a combining character.
199 static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex)
200 {
201     ASSERT(currentPosition < proposedBreakIndex);
202     ASSERT(proposedBreakIndex <= string.length());
203     // The end of the string is always a valid break.
204     if (proposedBreakIndex == string.length())
205         return proposedBreakIndex;
206
207     // Latin-1 does not have breakable boundaries. If we ever moved to a differnet 8-bit encoding this could be wrong.
208     if (string.is8Bit())
209         return proposedBreakIndex;
210
211     const UChar* breakSearchCharacters = string.characters16() + currentPosition;
212     // We need at least two characters look-ahead to account for UTF-16 surrogates, but can't search off the end of the buffer!
213     unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition);
214     NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength);
215
216     if (it.isBreak(proposedBreakIndex - currentPosition))
217         return proposedBreakIndex;
218
219     int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - currentPosition);
220     if (adjustedBreakIndexInSubstring > 0)
221         return currentPosition + adjustedBreakIndexInSubstring;
222     // We failed to find a breakable point, let the caller figure out what to do.
223     return 0;
224 }
225
226 static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whitespaceMode)
227 {
228     // Strings composed entirely of whitespace are likely to be repeated.
229     // Turn them into AtomicString so we share a single string for each.
230     if (whitespaceMode == AllWhitespace || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(string)))
231         return AtomicString(string).string();
232     return string;
233 }
234
235 void HTMLConstructionSite::flushPendingText(FlushMode mode)
236 {
237     if (m_pendingText.isEmpty())
238         return;
239
240     if (mode == FlushIfAtTextLimit
241         && !shouldUseLengthLimit(*m_pendingText.parent))
242         return;
243
244     PendingText pendingText;
245     // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely.
246     m_pendingText.swap(pendingText);
247     ASSERT(m_pendingText.isEmpty());
248
249     // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary
250     // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898
251     unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent);
252
253     unsigned currentPosition = 0;
254     const StringBuilder& string = pendingText.stringBuilder;
255     while (currentPosition < string.length()) {
256         unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());
257         unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
258         ASSERT(breakIndex <= string.length());
259         String substring = string.substring(currentPosition, breakIndex - currentPosition);
260         substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode);
261
262         HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
263         task.parent = pendingText.parent;
264         task.nextChild = pendingText.nextChild;
265         task.child = Text::create(task.parent->document(), substring);
266         queueTask(task);
267
268         ASSERT(breakIndex > currentPosition);
269         ASSERT(breakIndex - currentPosition == substring.length());
270         ASSERT(toText(task.child.get())->length() == substring.length());
271         currentPosition = breakIndex;
272     }
273 }
274
275 void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task)
276 {
277     flushPendingText(FlushAlways);
278     ASSERT(m_pendingText.isEmpty());
279     m_taskQueue.append(task);
280 }
281
282 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> prpChild, bool selfClosing)
283 {
284     ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get())));
285     ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !isHTMLPlugInElement(prpChild));
286
287     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
288     task.parent = parent;
289     task.child = prpChild;
290     task.selfClosing = selfClosing;
291
292     if (shouldFosterParent()) {
293         fosterParent(task.child);
294         return;
295     }
296
297     // Add as a sibling of the parent if we have reached the maximum depth allowed.
298     if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode())
299         task.parent = task.parent->parentNode();
300
301     ASSERT(task.parent);
302     queueTask(task);
303 }
304
305 void HTMLConstructionSite::executeQueuedTasks()
306 {
307     // This has no affect on pendingText, and we may have pendingText
308     // remaining after executing all other queued tasks.
309     const size_t size = m_taskQueue.size();
310     if (!size)
311         return;
312
313     // Copy the task queue into a local variable in case executeTask
314     // re-enters the parser.
315     TaskQueue queue;
316     queue.swap(m_taskQueue);
317
318     for (size_t i = 0; i < size; ++i)
319         executeTask(queue[i]);
320
321     // We might be detached now.
322 }
323
324 HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy)
325     : m_document(document)
326     , m_attachmentRoot(document)
327     , m_parserContentPolicy(parserContentPolicy)
328     , m_isParsingFragment(false)
329     , m_redirectAttachToFosterParent(false)
330     , m_inQuirksMode(document->inQuirksMode())
331 {
332     ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
333 }
334
335 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy)
336     : m_document(&fragment->document())
337     , m_attachmentRoot(fragment)
338     , m_parserContentPolicy(parserContentPolicy)
339     , m_isParsingFragment(true)
340     , m_redirectAttachToFosterParent(false)
341     , m_inQuirksMode(fragment->document().inQuirksMode())
342 {
343     ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
344 }
345
346 HTMLConstructionSite::~HTMLConstructionSite()
347 {
348     // Depending on why we're being destroyed it might be OK
349     // to forget queued tasks, but currently we don't expect to.
350     ASSERT(m_taskQueue.isEmpty());
351     // Currently we assume that text will never be the last token in the
352     // document and that we'll always queue some additional task to cause it to flush.
353     ASSERT(m_pendingText.isEmpty());
354 }
355
356 void HTMLConstructionSite::trace(Visitor* visitor)
357 {
358     visitor->trace(m_document);
359     visitor->trace(m_attachmentRoot);
360     visitor->trace(m_head);
361     visitor->trace(m_form);
362     visitor->trace(m_openElements);
363     visitor->trace(m_activeFormattingElements);
364     visitor->trace(m_taskQueue);
365     visitor->trace(m_pendingText);
366 }
367
368 void HTMLConstructionSite::detach()
369 {
370     // FIXME: We'd like to ASSERT here that we're canceling and not just discarding
371     // text that really should have made it into the DOM earlier, but there
372     // doesn't seem to be a nice way to do that.
373     m_pendingText.discard();
374     m_document = nullptr;
375     m_attachmentRoot = nullptr;
376 }
377
378 void HTMLConstructionSite::setForm(HTMLFormElement* form)
379 {
380     // This method should only be needed for HTMLTreeBuilder in the fragment case.
381     ASSERT(!m_form);
382     m_form = form;
383 }
384
385 PassRefPtrWillBeRawPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
386 {
387     return m_form.release();
388 }
389
390 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
391 {
392     ASSERT(m_document);
393     if (m_document->frame() && !m_isParsingFragment)
394         m_document->frame()->loader().dispatchDocumentElementAvailable();
395 }
396
397 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token)
398 {
399     ASSERT(m_document);
400     RefPtrWillBeRawPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document);
401     setAttributes(element.get(), token, m_parserContentPolicy);
402     attachLater(m_attachmentRoot, element);
403     m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));
404
405     executeQueuedTasks();
406     element->insertedByParser();
407     dispatchDocumentElementAvailableIfNeeded();
408 }
409
410 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element)
411 {
412     if (token->attributes().isEmpty())
413         return;
414
415     for (unsigned i = 0; i < token->attributes().size(); ++i) {
416         const Attribute& tokenAttribute = token->attributes().at(i);
417         if (element->attributesWithoutUpdate().findIndex(tokenAttribute.name()) == kNotFound)
418             element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
419     }
420 }
421
422 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token)
423 {
424     // Fragments do not have a root HTML element, so any additional HTML elements
425     // encountered during fragment parsing should be ignored.
426     if (m_isParsingFragment)
427         return;
428
429     mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
430 }
431
432 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token)
433 {
434     mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
435 }
436
437 void HTMLConstructionSite::setDefaultCompatibilityMode()
438 {
439     if (m_isParsingFragment)
440         return;
441     setCompatibilityMode(Document::QuirksMode);
442 }
443
444 void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode)
445 {
446     m_inQuirksMode = (mode == Document::QuirksMode);
447     m_document->setCompatibilityMode(mode);
448 }
449
450 void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId)
451 {
452     // There are three possible compatibility modes:
453     // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
454     // be omitted from numbers.
455     // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model.
456     // No Quirks - no quirks apply. Web pages will obey the specifications to the letter.
457
458     // Check for Quirks Mode.
459     if (name != "html"
460         || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false)
461         || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false)
462         || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false)
463         || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false)
464         || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false)
465         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false)
466         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false)
467         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false)
468         || publicId.startsWith("-//IETF//DTD HTML 2.0//", false)
469         || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false)
470         || publicId.startsWith("-//IETF//DTD HTML 3.0//", false)
471         || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false)
472         || publicId.startsWith("-//IETF//DTD HTML 3.2//", false)
473         || publicId.startsWith("-//IETF//DTD HTML 3//", false)
474         || publicId.startsWith("-//IETF//DTD HTML Level 0//", false)
475         || publicId.startsWith("-//IETF//DTD HTML Level 1//", false)
476         || publicId.startsWith("-//IETF//DTD HTML Level 2//", false)
477         || publicId.startsWith("-//IETF//DTD HTML Level 3//", false)
478         || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false)
479         || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false)
480         || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false)
481         || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false)
482         || publicId.startsWith("-//IETF//DTD HTML Strict//", false)
483         || publicId.startsWith("-//IETF//DTD HTML//", false)
484         || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false)
485         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false)
486         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false)
487         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false)
488         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false)
489         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false)
490         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false)
491         || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false)
492         || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false)
493         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false)
494         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false)
495         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false)
496         || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false)
497         || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false)
498         || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false)
499         || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false)
500         || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false)
501         || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false)
502         || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false)
503         || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false)
504         || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false)
505         || publicId.startsWith("-//W3C//DTD HTML 3.2//", false)
506         || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false)
507         || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false)
508         || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false)
509         || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false)
510         || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false)
511         || publicId.startsWith("-//W3C//DTD W3 HTML//", false)
512         || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false)
513         || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//")
514         || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false)
515         || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false)
516         || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN")
517         || equalIgnoringCase(publicId, "HTML")
518         || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
519         || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
520         || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
521         setCompatibilityMode(Document::QuirksMode);
522         return;
523     }
524
525     // Check for Limited Quirks Mode.
526     if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false)
527         || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false)
528         || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
529         || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
530         setCompatibilityMode(Document::LimitedQuirksMode);
531         return;
532     }
533
534     // Otherwise we are No Quirks Mode.
535     setCompatibilityMode(Document::NoQuirksMode);
536 }
537
538 void HTMLConstructionSite::processEndOfFile()
539 {
540     ASSERT(currentNode());
541     flush(FlushAlways);
542     openElements()->popAll();
543 }
544
545 void HTMLConstructionSite::finishedParsing()
546 {
547     // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute.
548     ASSERT(m_taskQueue.isEmpty());
549     flush(FlushAlways);
550     m_document->finishedParsing();
551 }
552
553 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token)
554 {
555     ASSERT(token->type() == HTMLToken::DOCTYPE);
556
557     const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier());
558     const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier());
559     RefPtrWillBeRawPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId);
560     attachLater(m_attachmentRoot, doctype.release());
561
562     // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
563     // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
564     // because context-less fragments can determine their own quirks mode, and thus change
565     // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
566     // in a fragment, as changing the owning document's compatibility mode would be wrong.
567     ASSERT(!m_isParsingFragment);
568     if (m_isParsingFragment)
569         return;
570
571     if (token->forceQuirks())
572         setCompatibilityMode(Document::QuirksMode);
573     else {
574         setCompatibilityModeFromDoctype(token->name(), publicId, systemId);
575     }
576 }
577
578 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token)
579 {
580     ASSERT(token->type() == HTMLToken::Comment);
581     attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment()));
582 }
583
584 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token)
585 {
586     ASSERT(token->type() == HTMLToken::Comment);
587     ASSERT(m_document);
588     attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment()));
589 }
590
591 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token)
592 {
593     ASSERT(token->type() == HTMLToken::Comment);
594     ContainerNode* parent = m_openElements.rootNode();
595     attachLater(parent, Comment::create(parent->document(), token->comment()));
596 }
597
598 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token)
599 {
600     ASSERT(!shouldFosterParent());
601     m_head = HTMLStackItem::create(createHTMLElement(token), token);
602     attachLater(currentNode(), m_head->element());
603     m_openElements.pushHTMLHeadElement(m_head);
604 }
605
606 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token)
607 {
608     ASSERT(!shouldFosterParent());
609     RefPtrWillBeRawPtr<HTMLElement> body = createHTMLElement(token);
610     attachLater(currentNode(), body);
611     m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token));
612     if (LocalFrame* frame = m_document->frame())
613         frame->loader().client()->dispatchWillInsertBody();
614 }
615
616 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted)
617 {
618     RefPtrWillBeRawPtr<HTMLElement> element = createHTMLElement(token);
619     ASSERT(isHTMLFormElement(element));
620     m_form = static_pointer_cast<HTMLFormElement>(element.release());
621     m_form->setDemoted(isDemoted);
622     attachLater(currentNode(), m_form.get());
623     m_openElements.push(HTMLStackItem::create(m_form.get(), token));
624 }
625
626 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
627 {
628     RefPtrWillBeRawPtr<HTMLElement> element = createHTMLElement(token);
629     attachLater(currentNode(), element);
630     m_openElements.push(HTMLStackItem::create(element.release(), token));
631 }
632
633 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
634 {
635     ASSERT(token->type() == HTMLToken::StartTag);
636     // Normally HTMLElementStack is responsible for calling finishParsingChildren,
637     // but self-closing elements are never in the element stack so the stack
638     // doesn't get a chance to tell them that we're done parsing their children.
639     attachLater(currentNode(), createHTMLElement(token), true);
640     // FIXME: Do we want to acknowledge the token's self-closing flag?
641     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
642 }
643
644 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token)
645 {
646     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
647     // Possible active formatting elements include:
648     // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
649     insertHTMLElement(token);
650     m_activeFormattingElements.append(currentElementRecord()->stackItem());
651 }
652
653 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
654 {
655     // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
656     // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
657     // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them.
658     // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see
659     // those flags or effects thereof.
660     const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
661     const bool alreadyStarted = m_isParsingFragment && parserInserted;
662     RefPtrWillBeRawPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted);
663     setAttributes(element.get(), token, m_parserContentPolicy);
664     if (scriptingContentIsAllowed(m_parserContentPolicy))
665         attachLater(currentNode(), element);
666     m_openElements.push(HTMLStackItem::create(element.release(), token));
667 }
668
669 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
670 {
671     ASSERT(token->type() == HTMLToken::StartTag);
672     notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
673
674     RefPtrWillBeRawPtr<Element> element = createElement(token, namespaceURI);
675     if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get()))
676         attachLater(currentNode(), element, token->selfClosing());
677     if (!token->selfClosing())
678         m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI));
679 }
680
681 void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode)
682 {
683     HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
684     dummyTask.parent = currentNode();
685
686     if (shouldFosterParent())
687         findFosterSite(dummyTask);
688
689     // FIXME: This probably doesn't need to be done both here and in insert(Task).
690     if (isHTMLTemplateElement(*dummyTask.parent))
691         dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();
692
693     // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token.
694     // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b"
695     // In either case we have to flush the pending text into the task queue before making more.
696     if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent ||  m_pendingText.nextChild != dummyTask.nextChild))
697         flushPendingText(FlushAlways);
698     m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode);
699 }
700
701 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child)
702 {
703     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
704     task.parent = newParent->node();
705     task.child = child->node();
706     queueTask(task);
707 }
708
709 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child)
710 {
711     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
712     task.parent = newParent->node();
713     task.child = child->node();
714     queueTask(task);
715 }
716
717 void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child)
718 {
719     if (newParent->causesFosterParenting()) {
720         fosterParent(child->node());
721         return;
722     }
723
724     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild);
725     task.parent = newParent->node();
726     task.child = child->node();
727     queueTask(task);
728 }
729
730 void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent)
731 {
732     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren);
733     task.parent = newParent->node();
734     task.child = oldParent->node();
735     queueTask(task);
736 }
737
738 PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
739 {
740     QualifiedName tagName(nullAtom, token->name(), namespaceURI);
741     RefPtrWillBeRawPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true);
742     setAttributes(element.get(), token, m_parserContentPolicy);
743     return element.release();
744 }
745
746 inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode()
747 {
748     if (isHTMLTemplateElement(*currentNode()))
749         return toHTMLTemplateElement(currentElement())->content()->document();
750     return currentNode()->document();
751 }
752
753 PassRefPtrWillBeRawPtr<HTMLElement> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token)
754 {
755     Document& document = ownerDocumentForCurrentNode();
756     // Only associate the element with the current form if we're creating the new element
757     // in a document with a browsing context (rather than in <template> contents).
758     HTMLFormElement* form = document.frame() ? m_form.get() : 0;
759     // FIXME: This can't use HTMLConstructionSite::createElement because we
760     // have to pass the current form element.  We should rework form association
761     // to occur after construction to allow better code sharing here.
762     RefPtrWillBeRawPtr<HTMLElement> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true);
763     setAttributes(element.get(), token, m_parserContentPolicy);
764     return element.release();
765 }
766
767 PassRefPtrWillBeRawPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item)
768 {
769     RefPtrWillBeRawPtr<Element> element;
770     // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
771     AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes());
772     if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
773         element = createHTMLElement(&fakeToken);
774     else
775         element = createElement(&fakeToken, item->namespaceURI());
776     return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI());
777 }
778
779 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
780 {
781     if (m_activeFormattingElements.isEmpty())
782         return false;
783     unsigned index = m_activeFormattingElements.size();
784     do {
785         --index;
786         const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
787         if (entry.isMarker() || m_openElements.contains(entry.element())) {
788             firstUnopenElementIndex = index + 1;
789             return firstUnopenElementIndex < m_activeFormattingElements.size();
790         }
791     } while (index);
792     firstUnopenElementIndex = index;
793     return true;
794 }
795
796 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
797 {
798     unsigned firstUnopenElementIndex;
799     if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
800         return;
801
802     unsigned unopenEntryIndex = firstUnopenElementIndex;
803     ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
804     for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
805         HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
806         RefPtrWillBeRawPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get());
807         attachLater(currentNode(), reconstructed->node());
808         m_openElements.push(reconstructed);
809         unopenedEntry.replaceElement(reconstructed.release());
810     }
811 }
812
813 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
814 {
815     while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName))
816         m_openElements.pop();
817 }
818
819 void HTMLConstructionSite::generateImpliedEndTags()
820 {
821     while (hasImpliedEndTag(currentStackItem()))
822         m_openElements.pop();
823 }
824
825 bool HTMLConstructionSite::inQuirksMode()
826 {
827     return m_inQuirksMode;
828 }
829
830
831 // Adjusts |task| to match the "adjusted insertion location" determined by the foster parenting algorithm,
832 // laid out as the substeps of step 2 of https://html.spec.whatwg.org/#appropriate-place-for-inserting-a-node
833 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
834 {
835     // 2.1
836     HTMLElementStack::ElementRecord* lastTemplate = m_openElements.topmost(templateTag.localName());
837
838     // 2.2
839     HTMLElementStack::ElementRecord* lastTable = m_openElements.topmost(tableTag.localName());
840
841     // 2.3
842     if (lastTemplate && (!lastTable || lastTemplate->isAbove(lastTable))) {
843         task.parent = lastTemplate->element();
844         return;
845     }
846
847     // 2.4
848     if (!lastTable) {
849         // Fragment case
850         task.parent = m_openElements.rootNode(); // DocumentFragment
851         return;
852     }
853
854     // 2.5
855     if (ContainerNode* parent = lastTable->element()->parentNode()) {
856         task.parent = parent;
857         task.nextChild = lastTable->element();
858         return;
859     }
860
861     // 2.6, 2.7
862     task.parent = lastTable->next()->element();
863 }
864
865 bool HTMLConstructionSite::shouldFosterParent() const
866 {
867     return m_redirectAttachToFosterParent
868         && currentStackItem()->isElementNode()
869         && currentStackItem()->causesFosterParenting();
870 }
871
872 void HTMLConstructionSite::fosterParent(PassRefPtrWillBeRawPtr<Node> node)
873 {
874     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
875     findFosterSite(task);
876     task.child = node;
877     ASSERT(task.parent);
878     queueTask(task);
879 }
880
881 void HTMLConstructionSite::PendingText::trace(Visitor* visitor)
882 {
883     visitor->trace(parent);
884     visitor->trace(nextChild);
885 }
886
887
888 }