2 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "core/dom/Attribute.h"
30 #include "wtf/PassOwnPtr.h"
31 #include "wtf/RefCounted.h"
32 #include "wtf/RefPtr.h"
37 WTF_MAKE_NONCOPYABLE(DoctypeData);
40 : m_hasPublicIdentifier(false)
41 , m_hasSystemIdentifier(false)
42 , m_forceQuirks(false)
46 bool m_hasPublicIdentifier;
47 bool m_hasSystemIdentifier;
48 WTF::Vector<UChar> m_publicIdentifier;
49 WTF::Vector<UChar> m_systemIdentifier;
53 static inline Attribute* findAttributeInVector(Vector<Attribute>& attributes, const QualifiedName& name)
55 for (unsigned i = 0; i < attributes.size(); ++i) {
56 if (attributes.at(i).name().matches(name))
57 return &attributes.at(i);
63 WTF_MAKE_NONCOPYABLE(HTMLToken);
64 WTF_MAKE_FAST_ALLOCATED;
86 Vector<UChar, 32> name;
87 Vector<UChar, 32> value;
90 typedef Vector<Attribute, 10> AttributeList;
92 // By using an inline capacity of 256, we avoid spilling over into an malloced buffer
93 // approximately 99% of the time based on a non-scientific browse around a number of
94 // popular web sites on 23 May 2013.
95 typedef Vector<UChar, 256> DataVector;
97 HTMLToken() { clear(); }
101 m_type = Uninitialized;
105 // Don't call Vector::clear() as that would destroy the
106 // alloced VectorBuffer. If the innerHTML'd content has
107 // two 257 character text nodes in a row, we'll needlessly
108 // thrash malloc. When we finally finish the parse the
109 // HTMLToken will be destroyed and the VectorBuffer released.
114 bool isUninitialized() { return m_type == Uninitialized; }
115 Type type() const { return m_type; }
119 ASSERT(m_type == Uninitialized);
123 /* Range and offset methods exposed for HTMLSourceTracker and HTMLViewSourceParser */
124 int startIndex() const { return m_range.start; }
125 int endIndex() const { return m_range.end; }
127 void setBaseOffset(int offset)
129 m_baseOffset = offset;
132 void end(int endOffset)
134 m_range.end = endOffset - m_baseOffset;
137 const DataVector& data() const
139 ASSERT(m_type == Character || m_type == Comment || m_type == StartTag || m_type == EndTag);
143 bool isAll8BitData() const
145 return (m_orAllData <= 0xff);
148 const DataVector& name() const
150 ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
154 void appendToName(UChar character)
156 ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
158 m_data.append(character);
159 m_orAllData |= character;
164 bool forceQuirks() const
166 ASSERT(m_type == DOCTYPE);
167 return m_doctypeData->m_forceQuirks;
170 void setForceQuirks()
172 ASSERT(m_type == DOCTYPE);
173 m_doctypeData->m_forceQuirks = true;
178 ASSERT(m_type == Uninitialized);
180 m_doctypeData = adoptPtr(new DoctypeData);
183 void beginDOCTYPE(UChar character)
187 m_data.append(character);
188 m_orAllData |= character;
191 // FIXME: Distinguish between a missing public identifer and an empty one.
192 const WTF::Vector<UChar>& publicIdentifier() const
194 ASSERT(m_type == DOCTYPE);
195 return m_doctypeData->m_publicIdentifier;
198 // FIXME: Distinguish between a missing system identifer and an empty one.
199 const WTF::Vector<UChar>& systemIdentifier() const
201 ASSERT(m_type == DOCTYPE);
202 return m_doctypeData->m_systemIdentifier;
205 void setPublicIdentifierToEmptyString()
207 ASSERT(m_type == DOCTYPE);
208 m_doctypeData->m_hasPublicIdentifier = true;
209 m_doctypeData->m_publicIdentifier.clear();
212 void setSystemIdentifierToEmptyString()
214 ASSERT(m_type == DOCTYPE);
215 m_doctypeData->m_hasSystemIdentifier = true;
216 m_doctypeData->m_systemIdentifier.clear();
219 void appendToPublicIdentifier(UChar character)
222 ASSERT(m_type == DOCTYPE);
223 ASSERT(m_doctypeData->m_hasPublicIdentifier);
224 m_doctypeData->m_publicIdentifier.append(character);
227 void appendToSystemIdentifier(UChar character)
230 ASSERT(m_type == DOCTYPE);
231 ASSERT(m_doctypeData->m_hasSystemIdentifier);
232 m_doctypeData->m_systemIdentifier.append(character);
235 PassOwnPtr<DoctypeData> releaseDoctypeData()
237 return m_doctypeData.release();
240 /* Start/End Tag Tokens */
242 bool selfClosing() const
244 ASSERT(m_type == StartTag || m_type == EndTag);
245 return m_selfClosing;
248 void setSelfClosing()
250 ASSERT(m_type == StartTag || m_type == EndTag);
251 m_selfClosing = true;
254 void beginStartTag(UChar character)
257 ASSERT(m_type == Uninitialized);
259 m_selfClosing = false;
260 m_currentAttribute = 0;
261 m_attributes.clear();
263 m_data.append(character);
264 m_orAllData |= character;
267 void beginEndTag(LChar character)
269 ASSERT(m_type == Uninitialized);
271 m_selfClosing = false;
272 m_currentAttribute = 0;
273 m_attributes.clear();
275 m_data.append(character);
278 void beginEndTag(const Vector<LChar, 32>& characters)
280 ASSERT(m_type == Uninitialized);
282 m_selfClosing = false;
283 m_currentAttribute = 0;
284 m_attributes.clear();
286 m_data.appendVector(characters);
289 void addNewAttribute()
291 ASSERT(m_type == StartTag || m_type == EndTag);
292 m_attributes.grow(m_attributes.size() + 1);
293 m_currentAttribute = &m_attributes.last();
295 m_currentAttribute->nameRange.start = 0;
296 m_currentAttribute->nameRange.end = 0;
297 m_currentAttribute->valueRange.start = 0;
298 m_currentAttribute->valueRange.end = 0;
302 void beginAttributeName(int offset)
304 m_currentAttribute->nameRange.start = offset - m_baseOffset;
307 void endAttributeName(int offset)
309 int index = offset - m_baseOffset;
310 m_currentAttribute->nameRange.end = index;
311 m_currentAttribute->valueRange.start = index;
312 m_currentAttribute->valueRange.end = index;
315 void beginAttributeValue(int offset)
317 m_currentAttribute->valueRange.start = offset - m_baseOffset;
319 m_currentAttribute->valueRange.end = 0;
323 void endAttributeValue(int offset)
325 m_currentAttribute->valueRange.end = offset - m_baseOffset;
328 void appendToAttributeName(UChar character)
331 ASSERT(m_type == StartTag || m_type == EndTag);
332 ASSERT(m_currentAttribute->nameRange.start);
333 m_currentAttribute->name.append(character);
336 void appendToAttributeValue(UChar character)
339 ASSERT(m_type == StartTag || m_type == EndTag);
340 ASSERT(m_currentAttribute->valueRange.start);
341 m_currentAttribute->value.append(character);
344 void appendToAttributeValue(size_t i, const String& value)
346 ASSERT(!value.isEmpty());
347 ASSERT(m_type == StartTag || m_type == EndTag);
348 append(m_attributes[i].value, value);
351 const AttributeList& attributes() const
353 ASSERT(m_type == StartTag || m_type == EndTag);
357 const Attribute* getAttributeItem(const QualifiedName& name) const
359 for (unsigned i = 0; i < m_attributes.size(); ++i) {
360 if (AtomicString(m_attributes.at(i).name) == name.localName())
361 return &m_attributes.at(i);
366 // Used by the XSSAuditor to nuke XSS-laden attributes.
367 void eraseValueOfAttribute(size_t i)
369 ASSERT(m_type == StartTag || m_type == EndTag);
370 m_attributes[i].value.clear();
373 /* Character Tokens */
375 // Starting a character token works slightly differently than starting
376 // other types of tokens because we want to save a per-character branch.
377 void ensureIsCharacterToken()
379 ASSERT(m_type == Uninitialized || m_type == Character);
383 const DataVector& characters() const
385 ASSERT(m_type == Character);
389 void appendToCharacter(char character)
391 ASSERT(m_type == Character);
392 m_data.append(character);
395 void appendToCharacter(UChar character)
397 ASSERT(m_type == Character);
398 m_data.append(character);
399 m_orAllData |= character;
402 void appendToCharacter(const Vector<LChar, 32>& characters)
404 ASSERT(m_type == Character);
405 m_data.appendVector(characters);
410 const DataVector& comment() const
412 ASSERT(m_type == Comment);
418 ASSERT(m_type == Uninitialized);
422 void appendToComment(UChar character)
425 ASSERT(m_type == Comment);
426 m_data.append(character);
427 m_orAllData |= character;
430 // Only for XSSAuditor
431 void eraseCharacters()
433 ASSERT(m_type == Character);
440 Attribute::Range m_range; // Always starts at zero.
445 // For StartTag and EndTag
447 AttributeList m_attributes;
449 // A pointer into m_attributes used during lexing.
450 Attribute* m_currentAttribute;
453 OwnPtr<DoctypeData> m_doctypeData;