2 * Copyright (C) 2010 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
17 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
20 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "core/html/parser/HTMLParserIdioms.h"
28 #include "core/HTMLNames.h"
30 #include "wtf/MathExtras.h"
31 #include "wtf/text/AtomicString.h"
32 #include "wtf/text/StringBuilder.h"
33 #include "wtf/text/StringHash.h"
34 #include "wtf/text/TextEncoding.h"
38 using namespace HTMLNames;
40 template <typename CharType>
41 static String stripLeadingAndTrailingHTMLSpaces(String string, const CharType* characters, unsigned length)
43 unsigned numLeadingSpaces = 0;
44 unsigned numTrailingSpaces = 0;
46 for (; numLeadingSpaces < length; ++numLeadingSpaces) {
47 if (isNotHTMLSpace<CharType>(characters[numLeadingSpaces]))
51 if (numLeadingSpaces == length)
52 return string.isNull() ? string : emptyAtom.string();
54 for (; numTrailingSpaces < length; ++numTrailingSpaces) {
55 if (isNotHTMLSpace<CharType>(characters[length - numTrailingSpaces - 1]))
59 ASSERT(numLeadingSpaces + numTrailingSpaces < length);
61 if (!(numLeadingSpaces | numTrailingSpaces))
64 return string.substring(numLeadingSpaces, length - (numLeadingSpaces + numTrailingSpaces));
67 String stripLeadingAndTrailingHTMLSpaces(const String& string)
69 unsigned length = string.length();
72 return string.isNull() ? string : emptyAtom.string();
75 return stripLeadingAndTrailingHTMLSpaces<LChar>(string, string.characters8(), length);
77 return stripLeadingAndTrailingHTMLSpaces<UChar>(string, string.characters16(), length);
80 String serializeForNumberType(const Decimal& number)
82 if (number.isZero()) {
83 // Decimal::toString appends exponent, e.g. "0e-18"
84 return number.isNegative() ? "-0" : "0";
86 return number.toString();
89 String serializeForNumberType(double number)
91 // According to HTML5, "the best representation of the number n as a floating
92 // point number" is a string produced by applying ToString() to n.
93 return String::numberToStringECMAScript(number);
96 Decimal parseToDecimalForNumberType(const String& string, const Decimal& fallbackValue)
98 // http://www.whatwg.org/specs/web-apps/current-work/#floating-point-numbers and parseToDoubleForNumberType
99 // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
100 const UChar firstCharacter = string[0];
101 if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
102 return fallbackValue;
104 const Decimal value = Decimal::fromString(string);
105 if (!value.isFinite())
106 return fallbackValue;
108 // Numbers are considered finite IEEE 754 Double-precision floating point values.
109 const Decimal doubleMax = Decimal::fromDouble(std::numeric_limits<double>::max());
110 if (value < -doubleMax || value > doubleMax)
111 return fallbackValue;
113 // We return +0 for -0 case.
114 return value.isZero() ? Decimal(0) : value;
117 double parseToDoubleForNumberType(const String& string, double fallbackValue)
119 // http://www.whatwg.org/specs/web-apps/current-work/#floating-point-numbers
120 // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
121 UChar firstCharacter = string[0];
122 if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
123 return fallbackValue;
126 double value = string.toDouble(&valid);
128 return fallbackValue;
130 // NaN and infinity are considered valid by String::toDouble, but not valid here.
131 if (!std::isfinite(value))
132 return fallbackValue;
134 // Numbers are considered finite IEEE 754 Double-precision floating point values.
135 if (-std::numeric_limits<double>::max() > value || value > std::numeric_limits<double>::max())
136 return fallbackValue;
138 // The following expression converts -0 to +0.
139 return value ? value : 0;
142 template <typename CharacterType>
143 static bool parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end, int& value)
149 while (position < end) {
150 if (!isHTMLSpace<CharacterType>(*position))
158 ASSERT(position < end);
161 if (*position == '-') {
164 } else if (*position == '+')
168 ASSERT(position < end);
171 if (!isASCIIDigit(*position))
175 StringBuilder digits;
176 while (position < end) {
177 if (!isASCIIDigit(*position))
179 digits.append(*position++);
185 value = sign * charactersToIntStrict(digits.characters8(), digits.length(), &ok);
187 value = sign * charactersToIntStrict(digits.characters16(), digits.length(), &ok);
191 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
192 bool parseHTMLInteger(const String& input, int& value)
196 unsigned length = input.length();
197 if (!length || input.is8Bit()) {
198 const LChar* start = input.characters8();
199 return parseHTMLIntegerInternal(start, start + length, value);
202 const UChar* start = input.characters16();
203 return parseHTMLIntegerInternal(start, start + length, value);
206 template <typename CharacterType>
207 static bool parseHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end, unsigned& value)
210 while (position < end) {
211 if (!isHTMLSpace<CharacterType>(*position))
219 ASSERT(position < end);
222 if (*position == '+')
228 ASSERT(position < end);
231 if (!isASCIIDigit(*position))
235 StringBuilder digits;
236 while (position < end) {
237 if (!isASCIIDigit(*position))
239 digits.append(*position++);
245 value = charactersToUIntStrict(digits.characters8(), digits.length(), &ok);
247 value = charactersToUIntStrict(digits.characters16(), digits.length(), &ok);
252 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-non-negative-integers
253 bool parseHTMLNonNegativeInteger(const String& input, unsigned& value)
257 unsigned length = input.length();
258 if (length && input.is8Bit()) {
259 const LChar* start = input.characters8();
260 return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
263 const UChar* start = input.characters16();
264 return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
267 static const char charsetString[] = "charset";
268 static const size_t charsetLength = sizeof("charset") - 1;
270 String extractCharset(const String& value)
273 unsigned length = value.length();
275 while (pos < length) {
276 pos = value.find(charsetString, pos, false);
277 if (pos == kNotFound)
280 pos += charsetLength;
283 while (pos < length && value[pos] <= ' ')
286 if (value[pos] != '=')
291 while (pos < length && value[pos] <= ' ')
295 if (pos < length && (value[pos] == '"' || value[pos] == '\'')) {
296 quoteMark = static_cast<char>(value[pos++]);
297 ASSERT(!(quoteMark & 0x80));
304 while (end < length && ((quoteMark && value[end] != quoteMark) || (!quoteMark && value[end] > ' ' && value[end] != '"' && value[end] != '\'' && value[end] != ';')))
307 if (quoteMark && (end == length))
308 break; // Close quote not found.
310 return value.substring(pos, end - pos);
322 WTF::TextEncoding encodingFromMetaAttributes(const HTMLAttributeList& attributes)
324 bool gotPragma = false;
328 for (const auto& htmlAttribute : attributes) {
329 const String& attributeName = htmlAttribute.first;
330 const String& attributeValue = AtomicString(htmlAttribute.second);
332 if (threadSafeMatch(attributeName, http_equivAttr)) {
333 if (equalIgnoringCase(attributeValue, "content-type"))
335 } else if (charset.isEmpty()) {
336 if (threadSafeMatch(attributeName, charsetAttr)) {
337 charset = attributeValue;
339 } else if (threadSafeMatch(attributeName, contentAttr)) {
340 charset = extractCharset(attributeValue);
341 if (charset.length())
347 if (mode == Charset || (mode == Pragma && gotPragma))
348 return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset));
350 return WTF::TextEncoding();
353 static bool threadSafeEqual(const StringImpl* a, const StringImpl* b)
357 if (a->hash() != b->hash())
359 return equalNonNull(a, b);
362 bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b)
364 return threadSafeEqual(a.localName().impl(), b.localName().impl());
367 bool threadSafeMatch(const String& localName, const QualifiedName& qName)
369 return threadSafeEqual(localName.impl(), qName.localName().impl());
372 template<typename CharType>
373 inline StringImpl* findStringIfStatic(const CharType* characters, unsigned length)
375 // We don't need to try hashing if we know the string is too long.
376 if (length > StringImpl::highestStaticStringLength())
378 // computeHashAndMaskTop8Bits is the function StringImpl::hash() uses.
379 unsigned hash = StringHasher::computeHashAndMaskTop8Bits(characters, length);
380 const WTF::StaticStringsTable& table = StringImpl::allStaticStrings();
381 ASSERT(!table.isEmpty());
383 WTF::StaticStringsTable::const_iterator it = table.find(hash);
384 if (it == table.end())
386 // It's possible to have hash collisions between arbitrary strings and
387 // known identifiers (e.g. "bvvfg" collides with "script").
388 // However ASSERTs in StringImpl::createStatic guard against there ever being collisions
389 // between static strings.
390 if (!equal(it->value, characters, length))
395 String attemptStaticStringCreation(const LChar* characters, size_t size)
397 String string(findStringIfStatic(characters, size));
400 return String(characters, size);
403 String attemptStaticStringCreation(const UChar* characters, size_t size, CharacterWidth width)
405 String string(findStringIfStatic(characters, size));
408 if (width == Likely8Bit)
409 string = StringImpl::create8BitIfPossible(characters, size);
410 else if (width == Force8Bit)
411 string = String::make8BitFrom16BitSource(characters, size);
413 string = String(characters, size);