2 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "core/html/parser/HTMLTreeBuilderSimulator.h"
29 #include "core/HTMLNames.h"
30 #include "core/MathMLNames.h"
31 #include "core/SVGNames.h"
32 #include "core/html/parser/HTMLParserIdioms.h"
33 #include "core/html/parser/HTMLTokenizer.h"
34 #include "core/html/parser/HTMLTreeBuilder.h"
38 using namespace HTMLNames;
40 static bool tokenExitsForeignContent(const CompactHTMLToken& token)
42 // FIXME: This is copied from HTMLTreeBuilder::processTokenInForeignContent and changed to use threadSafeHTMLNamesMatch.
43 const String& tagName = token.data();
44 return threadSafeMatch(tagName, bTag)
45 || threadSafeMatch(tagName, bigTag)
46 || threadSafeMatch(tagName, blockquoteTag)
47 || threadSafeMatch(tagName, bodyTag)
48 || threadSafeMatch(tagName, brTag)
49 || threadSafeMatch(tagName, centerTag)
50 || threadSafeMatch(tagName, codeTag)
51 || threadSafeMatch(tagName, ddTag)
52 || threadSafeMatch(tagName, divTag)
53 || threadSafeMatch(tagName, dlTag)
54 || threadSafeMatch(tagName, dtTag)
55 || threadSafeMatch(tagName, emTag)
56 || threadSafeMatch(tagName, embedTag)
57 || threadSafeMatch(tagName, h1Tag)
58 || threadSafeMatch(tagName, h2Tag)
59 || threadSafeMatch(tagName, h3Tag)
60 || threadSafeMatch(tagName, h4Tag)
61 || threadSafeMatch(tagName, h5Tag)
62 || threadSafeMatch(tagName, h6Tag)
63 || threadSafeMatch(tagName, headTag)
64 || threadSafeMatch(tagName, hrTag)
65 || threadSafeMatch(tagName, iTag)
66 || threadSafeMatch(tagName, imgTag)
67 || threadSafeMatch(tagName, liTag)
68 || threadSafeMatch(tagName, listingTag)
69 || threadSafeMatch(tagName, menuTag)
70 || threadSafeMatch(tagName, metaTag)
71 || threadSafeMatch(tagName, nobrTag)
72 || threadSafeMatch(tagName, olTag)
73 || threadSafeMatch(tagName, pTag)
74 || threadSafeMatch(tagName, preTag)
75 || threadSafeMatch(tagName, rubyTag)
76 || threadSafeMatch(tagName, sTag)
77 || threadSafeMatch(tagName, smallTag)
78 || threadSafeMatch(tagName, spanTag)
79 || threadSafeMatch(tagName, strongTag)
80 || threadSafeMatch(tagName, strikeTag)
81 || threadSafeMatch(tagName, subTag)
82 || threadSafeMatch(tagName, supTag)
83 || threadSafeMatch(tagName, tableTag)
84 || threadSafeMatch(tagName, ttTag)
85 || threadSafeMatch(tagName, uTag)
86 || threadSafeMatch(tagName, ulTag)
87 || threadSafeMatch(tagName, varTag)
88 || (threadSafeMatch(tagName, fontTag) && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)));
91 static bool tokenExitsSVG(const CompactHTMLToken& token)
93 // FIXME: It's very fragile that we special case foreignObject here to be case-insensitive.
94 return equalIgnoringCaseNonNull(token.data().impl(), SVGNames::foreignObjectTag.localName().impl());
97 static bool tokenExitsMath(const CompactHTMLToken& token)
99 // FIXME: This is copied from HTMLElementStack::isMathMLTextIntegrationPoint and changed to use threadSafeMatch.
100 const String& tagName = token.data();
101 return threadSafeMatch(tagName, MathMLNames::miTag)
102 || threadSafeMatch(tagName, MathMLNames::moTag)
103 || threadSafeMatch(tagName, MathMLNames::mnTag)
104 || threadSafeMatch(tagName, MathMLNames::msTag)
105 || threadSafeMatch(tagName, MathMLNames::mtextTag);
108 HTMLTreeBuilderSimulator::HTMLTreeBuilderSimulator(const HTMLParserOptions& options)
111 m_namespaceStack.append(HTML);
114 HTMLTreeBuilderSimulator::State HTMLTreeBuilderSimulator::stateFor(HTMLTreeBuilder* treeBuilder)
116 ASSERT(isMainThread());
117 State namespaceStack;
118 for (HTMLElementStack::ElementRecord* record = treeBuilder->openElements()->topRecord(); record; record = record->next()) {
119 Namespace currentNamespace = HTML;
120 if (record->namespaceURI() == SVGNames::svgNamespaceURI)
121 currentNamespace = SVG;
122 else if (record->namespaceURI() == MathMLNames::mathmlNamespaceURI)
123 currentNamespace = MathML;
125 if (namespaceStack.isEmpty() || namespaceStack.last() != currentNamespace)
126 namespaceStack.append(currentNamespace);
128 namespaceStack.reverse();
129 return namespaceStack;
132 bool HTMLTreeBuilderSimulator::simulate(const CompactHTMLToken& token, HTMLTokenizer* tokenizer)
134 if (token.type() == HTMLToken::StartTag) {
135 const String& tagName = token.data();
136 if (threadSafeMatch(tagName, SVGNames::svgTag))
137 m_namespaceStack.append(SVG);
138 if (threadSafeMatch(tagName, MathMLNames::mathTag))
139 m_namespaceStack.append(MathML);
140 if (inForeignContent() && tokenExitsForeignContent(token))
141 m_namespaceStack.removeLast();
142 if ((m_namespaceStack.last() == SVG && tokenExitsSVG(token))
143 || (m_namespaceStack.last() == MathML && tokenExitsMath(token)))
144 m_namespaceStack.append(HTML);
145 if (!inForeignContent()) {
146 // FIXME: This is just a copy of Tokenizer::updateStateFor which uses threadSafeMatches.
147 if (threadSafeMatch(tagName, textareaTag) || threadSafeMatch(tagName, titleTag))
148 tokenizer->setState(HTMLTokenizer::RCDATAState);
149 else if (threadSafeMatch(tagName, plaintextTag))
150 tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
151 else if (threadSafeMatch(tagName, scriptTag))
152 tokenizer->setState(HTMLTokenizer::ScriptDataState);
153 else if (threadSafeMatch(tagName, styleTag)
154 || threadSafeMatch(tagName, iframeTag)
155 || threadSafeMatch(tagName, xmpTag)
156 || (threadSafeMatch(tagName, noembedTag) && m_options.pluginsEnabled)
157 || threadSafeMatch(tagName, noframesTag)
158 || (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled))
159 tokenizer->setState(HTMLTokenizer::RAWTEXTState);
163 if (token.type() == HTMLToken::EndTag) {
164 const String& tagName = token.data();
165 if ((m_namespaceStack.last() == SVG && threadSafeMatch(tagName, SVGNames::svgTag))
166 || (m_namespaceStack.last() == MathML && threadSafeMatch(tagName, MathMLNames::mathTag))
167 || (m_namespaceStack.contains(SVG) && m_namespaceStack.last() == HTML && tokenExitsSVG(token))
168 || (m_namespaceStack.contains(MathML) && m_namespaceStack.last() == HTML && tokenExitsMath(token)))
169 m_namespaceStack.removeLast();
170 if (threadSafeMatch(tagName, scriptTag)) {
171 if (!inForeignContent())
172 tokenizer->setState(HTMLTokenizer::DataState);
177 // FIXME: Also setForceNullCharacterReplacement when in text mode.
178 tokenizer->setForceNullCharacterReplacement(inForeignContent());
179 tokenizer->setShouldAllowCDATA(inForeignContent());