2 Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public
6 License as published by the Free Software Foundation; either
7 version 2 of the License, or (at your option) any later version.
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public License
15 along with this library; see the file COPYING.LIB. If not, write to
16 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 Boston, MA 02110-1301, USA.
20 #ifndef SegmentedString_h
21 #define SegmentedString_h
23 #include "platform/PlatformExport.h"
24 #include "wtf/Deque.h"
25 #include "wtf/text/StringBuilder.h"
26 #include "wtf/text/TextPosition.h"
27 #include "wtf/text/WTFString.h"
31 class SegmentedString;
33 class PLATFORM_EXPORT SegmentedSubstring {
37 , m_doNotExcludeLineNumbers(true)
40 m_data.string16Ptr = 0;
43 SegmentedSubstring(const String& str)
44 : m_length(str.length())
45 , m_doNotExcludeLineNumbers(true)
49 if (m_string.is8Bit()) {
51 m_data.string8Ptr = m_string.characters8();
54 m_data.string16Ptr = m_string.characters16();
61 void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;}
63 bool is8Bit() { return m_is8Bit; }
65 bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
66 bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
68 void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
70 int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
72 void appendTo(StringBuilder& builder) const
74 int offset = m_string.length() - m_length;
78 builder.append(m_string);
80 builder.append(m_string.substring(offset, m_length));
84 UChar getCurrentChar8()
86 return *m_data.string8Ptr;
89 UChar getCurrentChar16()
91 return m_data.string16Ptr ? *m_data.string16Ptr : 0;
94 UChar incrementAndGetCurrentChar8()
96 ASSERT(m_data.string8Ptr);
97 return *++m_data.string8Ptr;
100 UChar incrementAndGetCurrentChar16()
102 ASSERT(m_data.string16Ptr);
103 return *++m_data.string16Ptr;
106 String currentSubString(unsigned length)
108 int offset = m_string.length() - m_length;
109 return m_string.substring(offset, length);
112 ALWAYS_INLINE UChar getCurrentChar()
116 return getCurrentChar8();
117 return getCurrentChar16();
120 ALWAYS_INLINE UChar incrementAndGetCurrentChar()
124 return incrementAndGetCurrentChar8();
125 return incrementAndGetCurrentChar16();
130 const LChar* string8Ptr;
131 const UChar* string16Ptr;
136 bool m_doNotExcludeLineNumbers;
141 class PLATFORM_EXPORT SegmentedString {
147 , m_numberOfCharactersConsumedPriorToCurrentString(0)
148 , m_numberOfCharactersConsumedPriorToCurrentLine(0)
152 , m_fastPathFlags(NoFastPath)
153 , m_advanceFunc(&SegmentedString::advanceEmpty)
154 , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty)
158 SegmentedString(const String& str)
161 , m_currentString(str)
163 , m_numberOfCharactersConsumedPriorToCurrentString(0)
164 , m_numberOfCharactersConsumedPriorToCurrentLine(0)
167 , m_empty(!str.length())
168 , m_fastPathFlags(NoFastPath)
170 if (m_currentString.m_length)
171 m_currentChar = m_currentString.getCurrentChar();
172 updateAdvanceFunctionPointers();
178 void append(const SegmentedString&);
179 void prepend(const SegmentedString&);
181 bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
182 void setExcludeLineNumbers();
186 if (!m_pushedChar1) {
188 m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar();
189 updateSlowCaseFunctionPointers();
191 ASSERT(!m_pushedChar2);
196 bool isEmpty() const { return m_empty; }
197 unsigned length() const;
199 bool isClosed() const { return m_closed; }
201 enum LookAheadResult {
207 LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); }
208 LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); }
212 if (m_fastPathFlags & Use8BitAdvance) {
213 ASSERT(!m_pushedChar1);
214 bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
215 m_currentChar = m_currentString.incrementAndGetCurrentChar8();
217 if (!haveOneCharacterLeft)
220 updateSlowCaseFunctionPointers();
225 (this->*m_advanceFunc)();
228 inline void advanceAndUpdateLineNumber()
230 if (m_fastPathFlags & Use8BitAdvance) {
231 ASSERT(!m_pushedChar1);
233 bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers);
234 bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
236 m_currentChar = m_currentString.incrementAndGetCurrentChar8();
238 if (!(haveNewLine | haveOneCharacterLeft))
243 m_numberOfCharactersConsumedPriorToCurrentLine = m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
246 if (haveOneCharacterLeft)
247 updateSlowCaseFunctionPointers();
252 (this->*m_advanceAndUpdateLineNumberFunc)();
255 void advanceAndASSERT(UChar expectedCharacter)
257 ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter);
261 void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
263 ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(currentChar()) == WTF::Unicode::foldCase(expectedCharacter));
267 void advancePastNonNewline()
269 ASSERT(currentChar() != '\n');
273 void advancePastNewlineAndUpdateLineNumber()
275 ASSERT(currentChar() == '\n');
276 if (!m_pushedChar1 && m_currentString.m_length > 1) {
277 int newLineFlag = m_currentString.doNotExcludeLineNumbers();
278 m_currentLine += newLineFlag;
280 m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
281 decrementAndCheckLength();
282 m_currentChar = m_currentString.incrementAndGetCurrentChar();
285 advanceAndUpdateLineNumberSlowCase();
288 // Writes the consumed characters into consumedCharacters, which must
289 // have space for at least |count| characters.
290 void advance(unsigned count, UChar* consumedCharacters);
292 bool escaped() const { return m_pushedChar1; }
294 int numberOfCharactersConsumed() const
296 int numberOfPushedCharacters = 0;
298 ++numberOfPushedCharacters;
300 ++numberOfPushedCharacters;
302 return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
305 String toString() const;
307 UChar currentChar() const { return m_currentChar; }
309 // The method is moderately slow, comparing to currentLine method.
310 OrdinalNumber currentColumn() const;
311 OrdinalNumber currentLine() const;
312 // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
313 // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
314 void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength);
319 Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
320 Use8BitAdvance = 1 << 1,
323 void append(const SegmentedSubstring&);
324 void prepend(const SegmentedSubstring&);
328 void advanceAndUpdateLineNumber8();
329 void advanceAndUpdateLineNumber16();
330 void advanceSlowCase();
331 void advanceAndUpdateLineNumberSlowCase();
333 void advanceSubstring();
335 void updateSlowCaseFunctionPointers();
337 void decrementAndCheckLength()
339 ASSERT(m_currentString.m_length > 1);
340 if (--m_currentString.m_length == 1)
341 updateSlowCaseFunctionPointers();
344 void updateAdvanceFunctionPointers()
346 if ((m_currentString.m_length > 1) && !m_pushedChar1) {
347 if (m_currentString.is8Bit()) {
348 m_advanceFunc = &SegmentedString::advance8;
349 m_fastPathFlags = Use8BitAdvance;
350 if (m_currentString.doNotExcludeLineNumbers()) {
351 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8;
352 m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
354 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8;
359 m_advanceFunc = &SegmentedString::advance16;
360 m_fastPathFlags = NoFastPath;
361 if (m_currentString.doNotExcludeLineNumbers())
362 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16;
364 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16;
368 if (!m_currentString.m_length && !isComposite()) {
369 m_advanceFunc = &SegmentedString::advanceEmpty;
370 m_fastPathFlags = NoFastPath;
371 m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
374 updateSlowCaseFunctionPointers();
377 inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive)
379 if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
380 String currentSubstring = m_currentString.currentSubString(string.length());
381 if (currentSubstring.startsWith(string, caseSensitive))
385 return lookAheadSlowCase(string, caseSensitive);
388 LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive)
390 unsigned count = string.length();
391 if (count > length())
392 return NotEnoughCharacters;
393 UChar* consumedCharacters;
394 String consumedString = String::createUninitialized(count, consumedCharacters);
395 advance(count, consumedCharacters);
396 LookAheadResult result = DidNotMatch;
397 if (consumedString.startsWith(string, caseSensitive))
399 prepend(SegmentedString(consumedString));
403 bool isComposite() const { return !m_substrings.isEmpty(); }
407 SegmentedSubstring m_currentString;
409 int m_numberOfCharactersConsumedPriorToCurrentString;
410 int m_numberOfCharactersConsumedPriorToCurrentLine;
412 Deque<SegmentedSubstring> m_substrings;
415 unsigned char m_fastPathFlags;
416 void (SegmentedString::*m_advanceFunc)();
417 void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)();