2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
26 #include "wtf/text/StringImpl.h"
28 #include "wtf/DynamicAnnotations.h"
29 #include "wtf/LeakAnnotations.h"
30 #include "wtf/MainThread.h"
31 #include "wtf/OwnPtr.h"
32 #include "wtf/PartitionAlloc.h"
33 #include "wtf/PassOwnPtr.h"
34 #include "wtf/StdLibExtras.h"
36 #include "wtf/text/AtomicString.h"
37 #include "wtf/text/StringBuffer.h"
38 #include "wtf/text/StringHash.h"
39 #include "wtf/unicode/CharacterNames.h"
40 #include <unicode/translit.h>
41 #include <unicode/unistr.h>
44 #include "wtf/DataLog.h"
45 #include "wtf/HashMap.h"
46 #include "wtf/HashSet.h"
47 #include "wtf/ProcessID.h"
48 #include "wtf/RefCounted.h"
49 #include "wtf/ThreadingPrimitives.h"
57 using namespace Unicode;
59 COMPILE_ASSERT(sizeof(StringImpl) == 3 * sizeof(int), StringImpl_should_stay_small);
63 static Mutex& statsMutex()
65 DEFINE_STATIC_LOCAL(Mutex, mutex, ());
69 static HashSet<void*>& liveStrings()
71 // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings.
72 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
76 void addStringForStats(StringImpl* string)
78 MutexLocker locker(statsMutex());
79 liveStrings().add(string);
82 void removeStringForStats(StringImpl* string)
84 MutexLocker locker(statsMutex());
85 liveStrings().remove(string);
88 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet)
90 const unsigned kMaxSnippetLength = 64;
93 size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
94 if (expectedLength == kMaxSnippetLength)
95 expectedLength += 3; // For the "...".
96 ++expectedLength; // For the terminating '\0'.
97 snippet.reserveCapacity(expectedLength);
100 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
101 UChar c = (*string)[i];
102 if (isASCIIPrintable(c))
107 if (i < string->length()) {
112 snippet.append('\0');
115 static bool isUnnecessarilyWide(const StringImpl* string)
117 if (string->is8Bit())
120 for (unsigned i = 0; i < string->length(); ++i)
121 c |= (*string)[i] >> 8;
125 class PerStringStats : public RefCounted<PerStringStats> {
127 static PassRefPtr<PerStringStats> create()
129 return adoptRef(new PerStringStats);
132 void add(const StringImpl* string)
136 m_length = string->length();
137 fillWithSnippet(string, m_snippet);
139 if (string->isAtomic())
140 ++m_numberOfAtomicCopies;
141 if (isUnnecessarilyWide(string))
142 m_unnecessarilyWide = true;
145 size_t totalCharacters() const
147 return m_numberOfCopies * m_length;
152 const char* status = "ok";
153 if (m_unnecessarilyWide)
155 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data());
158 bool m_unnecessarilyWide;
159 unsigned m_numberOfCopies;
161 unsigned m_numberOfAtomicCopies;
162 Vector<char> m_snippet;
166 : m_unnecessarilyWide(false)
167 , m_numberOfCopies(0)
169 , m_numberOfAtomicCopies(0)
174 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b)
176 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
177 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
178 if (a->totalCharacters() != b->totalCharacters())
179 return a->totalCharacters() < b->totalCharacters();
180 if (a->m_numberOfCopies != b->m_numberOfCopies)
181 return a->m_numberOfCopies < b->m_numberOfCopies;
182 if (a->m_length != b->m_length)
183 return a->m_length < b->m_length;
184 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
187 static void printLiveStringStats(void*)
189 MutexLocker locker(statsMutex());
190 HashSet<void*>& strings = liveStrings();
192 HashMap<StringImpl*, RefPtr<PerStringStats> > stats;
193 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) {
194 StringImpl* string = static_cast<StringImpl*>(*iter);
195 HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator entry = stats.find(string);
196 RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) : entry->value;
198 stats.set(string, value.release());
201 Vector<RefPtr<PerStringStats> > all;
202 for (HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator iter = stats.begin(); iter != stats.end(); ++iter)
203 all.append(iter->value);
205 std::sort(all.begin(), all.end());
206 std::reverse(all.begin(), all.end());
207 for (size_t i = 0; i < 20 && i < all.size(); ++i)
211 StringStats StringImpl::m_stringStats;
213 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency;
215 void StringStats::removeString(StringImpl* string)
217 unsigned length = string->length();
218 --m_totalNumberStrings;
220 if (string->is8Bit()) {
221 --m_number8BitStrings;
222 m_total8BitData -= length;
224 --m_number16BitStrings;
225 m_total16BitData -= length;
228 if (!--s_stringRemovesTillPrintStats) {
229 s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
234 void StringStats::printStats()
236 dataLogF("String stats for process id %d:\n", getCurrentProcessID());
238 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
239 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
240 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0;
241 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength);
243 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
244 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
245 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
247 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
248 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
249 dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength);
250 unsigned long long totalSavedBytes = m_total8BitData;
251 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;
252 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
254 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
255 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
256 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, overheadPercent);
258 callOnMainThread(printLiveStringStats, 0);
262 void* StringImpl::operator new(size_t size)
264 ASSERT(size == sizeof(StringImpl));
265 return partitionAllocGeneric(Partitions::getBufferPartition(), size);
268 void StringImpl::operator delete(void* ptr)
270 partitionFreeGeneric(Partitions::getBufferPartition(), ptr);
273 inline StringImpl::~StringImpl()
277 STRING_STATS_REMOVE_STRING(this);
280 AtomicString::remove(this);
283 void StringImpl::destroyIfNotStatic()
289 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
296 // Allocate a single buffer large enough to contain the StringImpl
297 // struct as well as the data which it contains. This removes one
298 // heap allocation from this call.
299 StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<LChar>(length)));
301 data = reinterpret_cast<LChar*>(string + 1);
302 return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
305 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
312 // Allocate a single buffer large enough to contain the StringImpl
313 // struct as well as the data which it contains. This removes one
314 // heap allocation from this call.
315 StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<UChar>(length)));
317 data = reinterpret_cast<UChar*>(string + 1);
318 return adoptRef(new (string) StringImpl(length));
321 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length)
323 ASSERT(originalString->hasOneRef());
328 bool is8Bit = originalString->is8Bit();
329 // Same as createUninitialized() except here we use realloc.
330 size_t size = is8Bit ? allocationSize<LChar>(length) : allocationSize<UChar>(length);
331 originalString->~StringImpl();
332 StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
334 return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
335 return adoptRef(new (string) StringImpl(length));
338 static StaticStringsTable& staticStrings()
340 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
341 return staticStrings;
345 static bool s_allowCreationOfStaticStrings = true;
348 const StaticStringsTable& StringImpl::allStaticStrings()
350 return staticStrings();
353 void StringImpl::freezeStaticStrings()
355 ASSERT(isMainThread());
358 s_allowCreationOfStaticStrings = false;
362 unsigned StringImpl::m_highestStaticStringLength = 0;
364 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash)
366 ASSERT(s_allowCreationOfStaticStrings);
370 StaticStringsTable::const_iterator it = staticStrings().find(hash);
371 if (it != staticStrings().end()) {
372 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
376 // Allocate a single buffer large enough to contain the StringImpl
377 // struct as well as the data which it contains. This removes one
378 // heap allocation from this call.
379 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
380 size_t size = sizeof(StringImpl) + length * sizeof(LChar);
382 WTF_ANNOTATE_SCOPED_MEMORY_LEAK;
383 StringImpl* impl = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
385 LChar* data = reinterpret_cast<LChar*>(impl + 1);
386 impl = new (impl) StringImpl(length, hash, StaticString);
387 memcpy(data, string, length * sizeof(LChar));
389 impl->assertHashIsCorrect();
392 ASSERT(isMainThread());
393 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
394 staticStrings().add(hash, impl);
395 WTF_ANNOTATE_BENIGN_RACE(impl,
396 "Benign race on the reference counter of a static string created by StringImpl::createStatic");
401 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
403 if (!characters || !length)
407 RefPtr<StringImpl> string = createUninitialized(length, data);
408 memcpy(data, characters, length * sizeof(UChar));
409 return string.release();
412 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
414 if (!characters || !length)
418 RefPtr<StringImpl> string = createUninitialized(length, data);
419 memcpy(data, characters, length * sizeof(LChar));
420 return string.release();
423 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
425 if (!characters || !length)
429 RefPtr<StringImpl> string = createUninitialized(length, data);
431 for (size_t i = 0; i < length; ++i) {
432 if (characters[i] & 0xff00)
433 return create(characters, length);
434 data[i] = static_cast<LChar>(characters[i]);
437 return string.release();
440 PassRefPtr<StringImpl> StringImpl::create(const LChar* string)
444 size_t length = strlen(reinterpret_cast<const char*>(string));
445 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
446 return create(string, length);
449 bool StringImpl::containsOnlyWhitespace()
451 // FIXME: The definition of whitespace here includes a number of characters
452 // that are not whitespace from the point of view of RenderText; I wonder if
453 // that's a problem in practice.
455 for (unsigned i = 0; i < m_length; ++i) {
456 UChar c = characters8()[i];
457 if (!isASCIISpace(c))
464 for (unsigned i = 0; i < m_length; ++i) {
465 UChar c = characters16()[i];
466 if (!isASCIISpace(c))
472 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
474 if (start >= m_length)
476 unsigned maxLength = m_length - start;
477 if (length >= maxLength) {
483 return create(characters8() + start, length);
485 return create(characters16() + start, length);
488 UChar32 StringImpl::characterStartingAt(unsigned i)
491 return characters8()[i];
492 if (U16_IS_SINGLE(characters16()[i]))
493 return characters16()[i];
494 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(characters16()[i + 1]))
495 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
499 PassRefPtr<StringImpl> StringImpl::lower()
501 // Note: This is a hot function in the Dromaeo benchmark, specifically the
502 // no-op code path up through the first 'return' statement.
504 // First scan the string for uppercase and non-ASCII characters:
508 const LChar* end = characters8() + m_length;
509 for (const LChar* chp = characters8(); chp != end; ++chp) {
510 if (UNLIKELY(isASCIIUpper(*chp)))
514 // Nothing to do if the string is all ASCII with no uppercase.
515 if (noUpper && !(ored & ~0x7F))
518 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
519 int32_t length = m_length;
522 RefPtr<StringImpl> newImpl = createUninitialized(length, data8);
524 if (!(ored & ~0x7F)) {
525 for (int32_t i = 0; i < length; ++i)
526 data8[i] = toASCIILower(characters8()[i]);
528 return newImpl.release();
531 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
532 for (int32_t i = 0; i < length; ++i)
533 data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
535 return newImpl.release();
538 const UChar* end = characters16() + m_length;
539 for (const UChar* chp = characters16(); chp != end; ++chp) {
540 if (UNLIKELY(isASCIIUpper(*chp)))
544 // Nothing to do if the string is all ASCII with no uppercase.
545 if (noUpper && !(ored & ~0x7F))
548 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
549 int32_t length = m_length;
551 if (!(ored & ~0x7F)) {
553 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
555 for (int32_t i = 0; i < length; ++i) {
556 UChar c = characters16()[i];
557 data16[i] = toASCIILower(c);
559 return newImpl.release();
562 // Do a slower implementation for cases that include non-ASCII characters.
564 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
567 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length, &error);
568 if (!error && realLength == length)
569 return newImpl.release();
571 newImpl = createUninitialized(realLength, data16);
572 Unicode::toLower(data16, realLength, characters16(), m_length, &error);
575 return newImpl.release();
578 PassRefPtr<StringImpl> StringImpl::upper()
580 // This function could be optimized for no-op cases the way lower() is,
581 // but in empirical testing, few actual calls to upper() are no-ops, so
582 // it wouldn't be worth the extra time for pre-scanning.
584 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
585 int32_t length = m_length;
589 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
591 // Do a faster loop for the case where all the characters are ASCII.
593 for (int i = 0; i < length; ++i) {
594 LChar c = characters8()[i];
596 data8[i] = toASCIIUpper(c);
599 return newImpl.release();
601 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
602 int numberSharpSCharacters = 0;
604 // There are two special cases.
605 // 1. latin-1 characters when converted to upper case are 16 bit characters.
606 // 2. Lower case sharp-S converts to "SS" (two characters)
607 for (int32_t i = 0; i < length; ++i) {
608 LChar c = characters8()[i];
609 if (UNLIKELY(c == smallLetterSharpS))
610 ++numberSharpSCharacters;
611 UChar upper = Unicode::toUpper(c);
612 if (UNLIKELY(upper > 0xff)) {
613 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
616 data8[i] = static_cast<LChar>(upper);
619 if (!numberSharpSCharacters)
620 return newImpl.release();
622 // We have numberSSCharacters sharp-s characters, but none of the other special characters.
623 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
627 for (int32_t i = 0; i < length; ++i) {
628 LChar c = characters8()[i];
629 if (c == smallLetterSharpS) {
633 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
636 return newImpl.release();
640 RefPtr<StringImpl> upconverted = upconvertedString();
641 const UChar* source16 = upconverted->characters16();
644 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
646 // Do a faster loop for the case where all the characters are ASCII.
648 for (int i = 0; i < length; ++i) {
649 UChar c = source16[i];
651 data16[i] = toASCIIUpper(c);
654 return newImpl.release();
656 // Do a slower implementation for cases that include non-ASCII characters.
658 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error);
659 if (!error && realLength == length)
661 newImpl = createUninitialized(realLength, data16);
662 Unicode::toUpper(data16, realLength, source16, m_length, &error);
665 return newImpl.release();
668 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang)
670 if (equalIgnoringCase(localeId, lang))
672 static char localeIdPrefix[4];
673 static const char delimeter[4] = "-_@";
675 size_t langLength = strlen(lang);
676 RELEASE_ASSERT(langLength >= 2 && langLength <= 3);
677 strncpy(localeIdPrefix, lang, langLength);
678 for (int i = 0; i < 3; ++i) {
679 localeIdPrefix[langLength] = delimeter[i];
680 // case-insensitive comparison
681 if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langLength + 1, false))
687 typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, const char*, UErrorCode*);
689 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length, icuCaseConverter converter, const char* locale, StringImpl* originalString)
692 int32_t targetLength = length;
693 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);
695 UErrorCode status = U_ZERO_ERROR;
696 targetLength = converter(data16, targetLength, source16, length, locale, &status);
697 if (U_SUCCESS(status)) {
698 output->truncateAssumingIsolated(targetLength);
699 return output.release();
701 if (status != U_BUFFER_OVERFLOW_ERROR)
702 return originalString;
703 // Expand the buffer.
704 output = StringImpl::createUninitialized(targetLength, data16);
708 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
710 // Use the more-optimized code path most of the time.
711 // Only Turkic (tr and az) languages and Lithuanian requires
712 // locale-specific lowercasing rules. Even though CLDR has el-Lower,
713 // it's identical to the locale-agnostic lowercasing. Context-dependent
714 // handling of Greek capital sigma is built into the common lowercasing
716 const char* localeForConversion = 0;
717 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
718 localeForConversion = "tr";
719 else if (localeIdMatchesLang(localeIdentifier, "lt"))
720 localeForConversion = "lt";
724 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
726 int length = m_length;
728 RefPtr<StringImpl> upconverted = upconvertedString();
729 const UChar* source16 = upconverted->characters16();
730 return caseConvert(source16, length, u_strToLower, localeForConversion, this);
733 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
735 // Use the more-optimized code path most of the time.
736 // Only Turkic (tr and az) languages and Greek require locale-specific
737 // lowercasing rules.
738 icu::UnicodeString transliteratorId;
739 const char* localeForConversion = 0;
740 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
741 localeForConversion = "tr";
742 else if (localeIdMatchesLang(localeIdentifier, "el"))
743 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper");
744 else if (localeIdMatchesLang(localeIdentifier, "lt"))
745 localeForConversion = "lt";
749 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
751 int length = m_length;
753 RefPtr<StringImpl> upconverted = upconvertedString();
754 const UChar* source16 = upconverted->characters16();
756 if (localeForConversion)
757 return caseConvert(source16, length, u_strToUpper, localeForConversion, this);
759 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek.
760 UErrorCode status = U_ZERO_ERROR;
761 OwnPtr<icu::Transliterator> translit =
762 adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FORWARD, status));
763 if (U_FAILURE(status))
766 // target will be copy-on-write.
767 icu::UnicodeString target(false, source16, length);
768 translit->transliterate(target);
770 return create(target.getBuffer(), target.length());
773 PassRefPtr<StringImpl> StringImpl::fill(UChar character)
775 if (!(character & ~0x7F)) {
777 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
778 for (unsigned i = 0; i < m_length; ++i)
780 return newImpl.release();
783 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
784 for (unsigned i = 0; i < m_length; ++i)
786 return newImpl.release();
789 PassRefPtr<StringImpl> StringImpl::foldCase()
791 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
792 int32_t length = m_length;
795 // Do a faster loop for the case where all the characters are ASCII.
797 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data);
800 for (int32_t i = 0; i < length; ++i) {
801 LChar c = characters8()[i];
802 data[i] = toASCIILower(c);
807 return newImpl.release();
809 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
810 for (int32_t i = 0; i < length; ++i)
811 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
813 return newImpl.release();
816 // Do a faster loop for the case where all the characters are ASCII.
818 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
820 for (int32_t i = 0; i < length; ++i) {
821 UChar c = characters16()[i];
823 data[i] = toASCIILower(c);
826 return newImpl.release();
828 // Do a slower implementation for cases that include non-ASCII characters.
830 int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error);
831 if (!error && realLength == length)
832 return newImpl.release();
833 newImpl = createUninitialized(realLength, data);
834 Unicode::foldCase(data, realLength, characters16(), m_length, &error);
837 return newImpl.release();
840 template <class UCharPredicate>
841 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
847 unsigned end = m_length - 1;
849 // skip white space from start
850 while (start <= end && predicate(is8Bit() ? characters8()[start] : characters16()[start]))
857 // skip white space from end
858 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
861 if (!start && end == m_length - 1)
864 return create(characters8() + start, end + 1 - start);
865 return create(characters16() + start, end + 1 - start);
868 class UCharPredicate {
870 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { }
872 inline bool operator()(UChar ch) const
874 return m_function(ch);
878 const CharacterMatchFunctionPtr m_function;
881 class SpaceOrNewlinePredicate {
883 inline bool operator()(UChar ch) const
885 return isSpaceOrNewline(ch);
889 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
891 return stripMatchedCharacters(SpaceOrNewlinePredicate());
894 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
896 return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
899 template <typename CharType>
900 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch)
902 const CharType* from = characters;
903 const CharType* fromend = from + m_length;
905 // Assume the common case will not remove any characters
906 while (from != fromend && !findMatch(*from))
911 StringBuffer<CharType> data(m_length);
912 CharType* to = data.characters();
913 unsigned outc = from - characters;
916 memcpy(to, characters, outc * sizeof(CharType));
919 while (from != fromend && findMatch(*from))
921 while (from != fromend && !findMatch(*from))
922 to[outc++] = *from++;
929 return data.release();
932 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
935 return removeCharacters(characters8(), findMatch);
936 return removeCharacters(characters16(), findMatch);
939 template <typename CharType, class UCharPredicate>
940 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate, StripBehavior stripBehavior)
942 StringBuffer<CharType> data(m_length);
944 const CharType* from = getCharacters<CharType>();
945 const CharType* fromend = from + m_length;
947 bool changedToSpace = false;
949 CharType* to = data.characters();
951 if (stripBehavior == StripExtraWhiteSpace) {
953 while (from != fromend && predicate(*from)) {
955 changedToSpace = true;
958 while (from != fromend && !predicate(*from))
959 to[outc++] = *from++;
966 if (outc > 0 && to[outc - 1] == ' ')
969 for (; from != fromend; ++from) {
970 if (predicate(*from)) {
972 changedToSpace = true;
980 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
985 return data.release();
988 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior)
991 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate(), stripBehavior);
992 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate(), stripBehavior);
995 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior)
998 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace), stripBehavior);
999 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWhiteSpace), stripBehavior);
1002 int StringImpl::toIntStrict(bool* ok, int base)
1005 return charactersToIntStrict(characters8(), m_length, ok, base);
1006 return charactersToIntStrict(characters16(), m_length, ok, base);
1009 unsigned StringImpl::toUIntStrict(bool* ok, int base)
1012 return charactersToUIntStrict(characters8(), m_length, ok, base);
1013 return charactersToUIntStrict(characters16(), m_length, ok, base);
1016 int64_t StringImpl::toInt64Strict(bool* ok, int base)
1019 return charactersToInt64Strict(characters8(), m_length, ok, base);
1020 return charactersToInt64Strict(characters16(), m_length, ok, base);
1023 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
1026 return charactersToUInt64Strict(characters8(), m_length, ok, base);
1027 return charactersToUInt64Strict(characters16(), m_length, ok, base);
1030 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
1033 return charactersToIntPtrStrict(characters8(), m_length, ok, base);
1034 return charactersToIntPtrStrict(characters16(), m_length, ok, base);
1037 int StringImpl::toInt(bool* ok)
1040 return charactersToInt(characters8(), m_length, ok);
1041 return charactersToInt(characters16(), m_length, ok);
1044 unsigned StringImpl::toUInt(bool* ok)
1047 return charactersToUInt(characters8(), m_length, ok);
1048 return charactersToUInt(characters16(), m_length, ok);
1051 int64_t StringImpl::toInt64(bool* ok)
1054 return charactersToInt64(characters8(), m_length, ok);
1055 return charactersToInt64(characters16(), m_length, ok);
1058 uint64_t StringImpl::toUInt64(bool* ok)
1061 return charactersToUInt64(characters8(), m_length, ok);
1062 return charactersToUInt64(characters16(), m_length, ok);
1065 intptr_t StringImpl::toIntPtr(bool* ok)
1068 return charactersToIntPtr(characters8(), m_length, ok);
1069 return charactersToIntPtr(characters16(), m_length, ok);
1072 double StringImpl::toDouble(bool* ok)
1075 return charactersToDouble(characters8(), m_length, ok);
1076 return charactersToDouble(characters16(), m_length, ok);
1079 float StringImpl::toFloat(bool* ok)
1082 return charactersToFloat(characters8(), m_length, ok);
1083 return charactersToFloat(characters16(), m_length, ok);
1086 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
1090 if (foldCase(*a++) != foldCase(bc))
1096 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
1100 if (foldCase(*a++) != foldCase(bc))
1106 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
1109 return WTF::find(characters8(), m_length, matchFunction, start);
1110 return WTF::find(characters16(), m_length, matchFunction, start);
1113 size_t StringImpl::find(const LChar* matchString, unsigned index)
1115 // Check for null or empty string to match against
1118 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1119 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1120 unsigned matchLength = matchStringLength;
1122 return min(index, length());
1124 // Optimization 1: fast case for strings of length 1.
1125 if (matchLength == 1)
1126 return WTF::find(characters16(), length(), *matchString, index);
1128 // Check index & matchLength are in range.
1129 if (index > length())
1131 unsigned searchLength = length() - index;
1132 if (matchLength > searchLength)
1134 // delta is the number of additional times to test; delta == 0 means test only once.
1135 unsigned delta = searchLength - matchLength;
1137 const UChar* searchCharacters = characters16() + index;
1139 // Optimization 2: keep a running hash of the strings,
1140 // only call equal if the hashes match.
1141 unsigned searchHash = 0;
1142 unsigned matchHash = 0;
1143 for (unsigned i = 0; i < matchLength; ++i) {
1144 searchHash += searchCharacters[i];
1145 matchHash += matchString[i];
1149 // keep looping until we match
1150 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
1153 searchHash += searchCharacters[i + matchLength];
1154 searchHash -= searchCharacters[i];
1160 template<typename CharType>
1161 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchLength)
1163 // delta is the number of additional times to test; delta == 0 means test only once.
1164 unsigned delta = searchLength - matchLength;
1167 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
1175 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
1177 // Check for null or empty string to match against
1180 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1181 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1182 unsigned matchLength = matchStringLength;
1184 return min(index, length());
1186 // Check index & matchLength are in range.
1187 if (index > length())
1189 unsigned searchLength = length() - index;
1190 if (matchLength > searchLength)
1194 return findIgnoringCaseInternal(characters8() + index, matchString, index, searchLength, matchLength);
1195 return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength);
1198 template <typename SearchCharacterType, typename MatchCharacterType>
1199 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1201 // Optimization: keep a running hash of the strings,
1202 // only call equal() if the hashes match.
1204 // delta is the number of additional times to test; delta == 0 means test only once.
1205 unsigned delta = searchLength - matchLength;
1207 unsigned searchHash = 0;
1208 unsigned matchHash = 0;
1210 for (unsigned i = 0; i < matchLength; ++i) {
1211 searchHash += searchCharacters[i];
1212 matchHash += matchCharacters[i];
1216 // keep looping until we match
1217 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
1220 searchHash += searchCharacters[i + matchLength];
1221 searchHash -= searchCharacters[i];
1227 size_t StringImpl::find(StringImpl* matchString)
1229 // Check for null string to match against
1230 if (UNLIKELY(!matchString))
1232 unsigned matchLength = matchString->length();
1234 // Optimization 1: fast case for strings of length 1.
1235 if (matchLength == 1) {
1237 if (matchString->is8Bit())
1238 return WTF::find(characters8(), length(), matchString->characters8()[0]);
1239 return WTF::find(characters8(), length(), matchString->characters16()[0]);
1241 if (matchString->is8Bit())
1242 return WTF::find(characters16(), length(), matchString->characters8()[0]);
1243 return WTF::find(characters16(), length(), matchString->characters16()[0]);
1246 // Check matchLength is in range.
1247 if (matchLength > length())
1250 // Check for empty string to match against
1251 if (UNLIKELY(!matchLength))
1255 if (matchString->is8Bit())
1256 return findInternal(characters8(), matchString->characters8(), 0, length(), matchLength);
1257 return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength);
1260 if (matchString->is8Bit())
1261 return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength);
1263 return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength);
1266 size_t StringImpl::find(StringImpl* matchString, unsigned index)
1268 // Check for null or empty string to match against
1269 if (UNLIKELY(!matchString))
1272 unsigned matchLength = matchString->length();
1274 // Optimization 1: fast case for strings of length 1.
1275 if (matchLength == 1) {
1277 return WTF::find(characters8(), length(), (*matchString)[0], index);
1278 return WTF::find(characters16(), length(), (*matchString)[0], index);
1281 if (UNLIKELY(!matchLength))
1282 return min(index, length());
1284 // Check index & matchLength are in range.
1285 if (index > length())
1287 unsigned searchLength = length() - index;
1288 if (matchLength > searchLength)
1292 if (matchString->is8Bit())
1293 return findInternal(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1294 return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1297 if (matchString->is8Bit())
1298 return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1300 return findInternal(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1303 template <typename SearchCharacterType, typename MatchCharacterType>
1304 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1306 // delta is the number of additional times to test; delta == 0 means test only once.
1307 unsigned delta = searchLength - matchLength;
1310 // keep looping until we match
1311 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
1319 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
1321 // Check for null or empty string to match against
1324 unsigned matchLength = matchString->length();
1326 return min(index, length());
1328 // Check index & matchLength are in range.
1329 if (index > length())
1331 unsigned searchLength = length() - index;
1332 if (matchLength > searchLength)
1336 if (matchString->is8Bit())
1337 return findIgnoringCaseInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1338 return findIgnoringCaseInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1341 if (matchString->is8Bit())
1342 return findIgnoringCaseInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1344 return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1347 size_t StringImpl::findNextLineStart(unsigned index)
1350 return WTF::findNextLineStart(characters8(), m_length, index);
1351 return WTF::findNextLineStart(characters16(), m_length, index);
1354 size_t StringImpl::count(LChar c) const
1358 for (size_t i = 0; i < m_length; ++i)
1359 count += characters8()[i] == c;
1361 for (size_t i = 0; i < m_length; ++i)
1362 count += characters16()[i] == c;
1367 size_t StringImpl::reverseFind(UChar c, unsigned index)
1370 return WTF::reverseFind(characters8(), m_length, c, index);
1371 return WTF::reverseFind(characters16(), m_length, c, index);
1374 template <typename SearchCharacterType, typename MatchCharacterType>
1375 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1377 // Optimization: keep a running hash of the strings,
1378 // only call equal if the hashes match.
1380 // delta is the number of additional times to test; delta == 0 means test only once.
1381 unsigned delta = min(index, length - matchLength);
1383 unsigned searchHash = 0;
1384 unsigned matchHash = 0;
1385 for (unsigned i = 0; i < matchLength; ++i) {
1386 searchHash += searchCharacters[delta + i];
1387 matchHash += matchCharacters[i];
1390 // keep looping until we match
1391 while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) {
1395 searchHash -= searchCharacters[delta + matchLength];
1396 searchHash += searchCharacters[delta];
1401 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
1403 // Check for null or empty string to match against
1406 unsigned matchLength = matchString->length();
1407 unsigned ourLength = length();
1409 return min(index, ourLength);
1411 // Optimization 1: fast case for strings of length 1.
1412 if (matchLength == 1) {
1414 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
1415 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index);
1418 // Check index & matchLength are in range.
1419 if (matchLength > ourLength)
1423 if (matchString->is8Bit())
1424 return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1425 return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1428 if (matchString->is8Bit())
1429 return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1431 return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1434 template <typename SearchCharacterType, typename MatchCharacterType>
1435 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1437 // delta is the number of additional times to test; delta == 0 means test only once.
1438 unsigned delta = min(index, length - matchLength);
1440 // keep looping until we match
1441 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
1449 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
1451 // Check for null or empty string to match against
1454 unsigned matchLength = matchString->length();
1455 unsigned ourLength = length();
1457 return min(index, ourLength);
1459 // Check index & matchLength are in range.
1460 if (matchLength > ourLength)
1464 if (matchString->is8Bit())
1465 return reverseFindIgnoringCaseInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1466 return reverseFindIgnoringCaseInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1469 if (matchString->is8Bit())
1470 return reverseFindIgnoringCaseInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1472 return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1475 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned startOffset, const char* matchString, unsigned matchLength, bool caseSensitive)
1478 ASSERT(matchLength <= stringImpl->length());
1479 ASSERT(startOffset + matchLength <= stringImpl->length());
1481 if (caseSensitive) {
1482 if (stringImpl->is8Bit())
1483 return equal(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1484 return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1486 if (stringImpl->is8Bit())
1487 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1488 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1491 bool StringImpl::startsWith(UChar character) const
1493 return m_length && (*this)[0] == character;
1496 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1498 ASSERT(matchLength);
1499 if (matchLength > length())
1501 return equalInner(this, 0, matchString, matchLength, caseSensitive);
1504 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
1506 ASSERT(matchString);
1507 if (m_length >= matchString->m_length) {
1508 unsigned start = m_length - matchString->m_length;
1509 return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start;
1514 bool StringImpl::endsWith(UChar character) const
1516 return m_length && (*this)[m_length - 1] == character;
1519 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1521 ASSERT(matchLength);
1522 if (matchLength > length())
1524 unsigned startOffset = length() - matchLength;
1525 return equalInner(this, startOffset, matchString, matchLength, caseSensitive);
1528 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
1533 if (find(oldC) == kNotFound)
1540 LChar oldChar = static_cast<LChar>(oldC);
1541 LChar newChar = static_cast<LChar>(newC);
1543 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1545 for (i = 0; i != m_length; ++i) {
1546 LChar ch = characters8()[i];
1551 return newImpl.release();
1554 // There is the possibility we need to up convert from 8 to 16 bit,
1555 // create a 16 bit string for the result.
1557 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1559 for (i = 0; i != m_length; ++i) {
1560 UChar ch = characters8()[i];
1566 return newImpl.release();
1570 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1572 for (i = 0; i != m_length; ++i) {
1573 UChar ch = characters16()[i];
1578 return newImpl.release();
1581 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
1583 position = min(position, length());
1584 lengthToReplace = min(lengthToReplace, length() - position);
1585 unsigned lengthToInsert = str ? str->length() : 0;
1586 if (!lengthToReplace && !lengthToInsert)
1589 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert));
1591 if (is8Bit() && (!str || str->is8Bit())) {
1593 RefPtr<StringImpl> newImpl =
1594 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1595 memcpy(data, characters8(), position * sizeof(LChar));
1597 memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar));
1598 memcpy(data + position + lengthToInsert, characters8() + position + lengthToReplace,
1599 (length() - position - lengthToReplace) * sizeof(LChar));
1600 return newImpl.release();
1603 RefPtr<StringImpl> newImpl =
1604 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1606 for (unsigned i = 0; i < position; ++i)
1607 data[i] = characters8()[i];
1609 memcpy(data, characters16(), position * sizeof(UChar));
1612 for (unsigned i = 0; i < lengthToInsert; ++i)
1613 data[i + position] = str->characters8()[i];
1615 memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar));
1618 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
1619 data[i + position + lengthToInsert] = characters8()[i + position + lengthToReplace];
1621 memcpy(data + position + lengthToInsert, characters16() + position + lengthToReplace,
1622 (length() - position - lengthToReplace) * sizeof(UChar));
1624 return newImpl.release();
1627 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
1632 if (replacement->is8Bit())
1633 return replace(pattern, replacement->characters8(), replacement->length());
1635 return replace(pattern, replacement->characters16(), replacement->length());
1638 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
1640 ASSERT(replacement);
1642 size_t srcSegmentStart = 0;
1643 unsigned matchCount = 0;
1645 // Count the matches.
1646 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1651 // If we have 0 matches then we don't have to do any more work.
1655 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1657 unsigned replaceSize = matchCount * repStrLength;
1658 unsigned newSize = m_length - matchCount;
1659 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1661 newSize += replaceSize;
1663 // Construct the new data.
1664 size_t srcSegmentEnd;
1665 unsigned srcSegmentLength;
1666 srcSegmentStart = 0;
1667 unsigned dstOffset = 0;
1671 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1673 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1674 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1675 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1676 dstOffset += srcSegmentLength;
1677 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
1678 dstOffset += repStrLength;
1679 srcSegmentStart = srcSegmentEnd + 1;
1682 srcSegmentLength = m_length - srcSegmentStart;
1683 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1685 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1687 return newImpl.release();
1691 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1693 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1694 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1695 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1697 dstOffset += srcSegmentLength;
1698 for (unsigned i = 0; i < repStrLength; ++i)
1699 data[i + dstOffset] = replacement[i];
1701 dstOffset += repStrLength;
1702 srcSegmentStart = srcSegmentEnd + 1;
1705 srcSegmentLength = m_length - srcSegmentStart;
1706 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1708 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1710 return newImpl.release();
1713 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
1715 ASSERT(replacement);
1717 size_t srcSegmentStart = 0;
1718 unsigned matchCount = 0;
1720 // Count the matches.
1721 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1726 // If we have 0 matches then we don't have to do any more work.
1730 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1732 unsigned replaceSize = matchCount * repStrLength;
1733 unsigned newSize = m_length - matchCount;
1734 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1736 newSize += replaceSize;
1738 // Construct the new data.
1739 size_t srcSegmentEnd;
1740 unsigned srcSegmentLength;
1741 srcSegmentStart = 0;
1742 unsigned dstOffset = 0;
1746 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1748 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1749 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1750 for (unsigned i = 0; i < srcSegmentLength; ++i)
1751 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1753 dstOffset += srcSegmentLength;
1754 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1756 dstOffset += repStrLength;
1757 srcSegmentStart = srcSegmentEnd + 1;
1760 srcSegmentLength = m_length - srcSegmentStart;
1761 for (unsigned i = 0; i < srcSegmentLength; ++i)
1762 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1764 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1766 return newImpl.release();
1770 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1772 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1773 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1774 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1776 dstOffset += srcSegmentLength;
1777 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1779 dstOffset += repStrLength;
1780 srcSegmentStart = srcSegmentEnd + 1;
1783 srcSegmentLength = m_length - srcSegmentStart;
1784 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1786 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1788 return newImpl.release();
1791 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
1793 if (!pattern || !replacement)
1796 unsigned patternLength = pattern->length();
1800 unsigned repStrLength = replacement->length();
1801 size_t srcSegmentStart = 0;
1802 unsigned matchCount = 0;
1804 // Count the matches.
1805 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1807 srcSegmentStart += patternLength;
1810 // If we have 0 matches, we don't have to do any more work
1814 unsigned newSize = m_length - matchCount * patternLength;
1815 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1817 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repStrLength));
1819 newSize += matchCount * repStrLength;
1822 // Construct the new data
1823 size_t srcSegmentEnd;
1824 unsigned srcSegmentLength;
1825 srcSegmentStart = 0;
1826 unsigned dstOffset = 0;
1827 bool srcIs8Bit = is8Bit();
1828 bool replacementIs8Bit = replacement->is8Bit();
1830 // There are 4 cases:
1831 // 1. This and replacement are both 8 bit.
1832 // 2. This and replacement are both 16 bit.
1833 // 3. This is 8 bit and replacement is 16 bit.
1834 // 4. This is 16 bit and replacement is 8 bit.
1835 if (srcIs8Bit && replacementIs8Bit) {
1838 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1839 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1840 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1841 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1842 dstOffset += srcSegmentLength;
1843 memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar));
1844 dstOffset += repStrLength;
1845 srcSegmentStart = srcSegmentEnd + patternLength;
1848 srcSegmentLength = m_length - srcSegmentStart;
1849 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1851 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1853 return newImpl.release();
1857 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1858 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1859 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1862 for (unsigned i = 0; i < srcSegmentLength; ++i)
1863 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1866 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1868 dstOffset += srcSegmentLength;
1869 if (replacementIs8Bit) {
1871 for (unsigned i = 0; i < repStrLength; ++i)
1872 data[i + dstOffset] = replacement->characters8()[i];
1875 memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar));
1877 dstOffset += repStrLength;
1878 srcSegmentStart = srcSegmentEnd + patternLength;
1881 srcSegmentLength = m_length - srcSegmentStart;
1884 for (unsigned i = 0; i < srcSegmentLength; ++i)
1885 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1888 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1891 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1893 return newImpl.release();
1896 PassRefPtr<StringImpl> StringImpl::upconvertedString()
1899 return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl();
1903 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
1905 unsigned aLength = a->length();
1906 unsigned bLength = b->length();
1907 if (aLength != bLength)
1912 return equal(a->characters8(), b->characters8(), aLength);
1914 return equal(a->characters8(), b->characters16(), aLength);
1918 return equal(a->characters16(), b->characters8(), aLength);
1920 return equal(a->characters16(), b->characters16(), aLength);
1923 bool equal(const StringImpl* a, const StringImpl* b)
1929 if (a->isAtomic() && b->isAtomic())
1932 return stringImplContentEqual(a, b);
1935 template <typename CharType>
1936 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned length)
1943 if (a->length() != length)
1946 return equal(a->characters8(), b, length);
1947 return equal(a->characters16(), b, length);
1950 bool equal(const StringImpl* a, const LChar* b, unsigned length)
1952 return equalInternal(a, b, length);
1955 bool equal(const StringImpl* a, const UChar* b, unsigned length)
1957 return equalInternal(a, b, length);
1960 bool equal(const StringImpl* a, const LChar* b)
1967 unsigned length = a->length();
1970 const LChar* aPtr = a->characters8();
1971 for (unsigned i = 0; i != length; ++i) {
1983 const UChar* aPtr = a->characters16();
1984 for (unsigned i = 0; i != length; ++i) {
1995 bool equalNonNull(const StringImpl* a, const StringImpl* b)
2001 return stringImplContentEqual(a, b);
2004 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
2011 return CaseFoldingHash::equal(a, b);
2014 bool equalIgnoringCase(const StringImpl* a, const LChar* b)
2021 unsigned length = a->length();
2023 // Do a faster loop for the case where all the characters are ASCII.
2027 const LChar* as = a->characters8();
2028 for (unsigned i = 0; i != length; ++i) {
2034 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2037 // Do a slower implementation for cases that include non-ASCII characters.
2040 for (unsigned i = 0; i != length; ++i)
2041 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2044 return equal && !b[length];
2047 const UChar* as = a->characters16();
2048 for (unsigned i = 0; i != length; ++i) {
2054 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2057 // Do a slower implementation for cases that include non-ASCII characters.
2060 for (unsigned i = 0; i != length; ++i) {
2061 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2065 return equal && !b[length];
2068 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)
2074 unsigned length = a->length();
2075 if (length != b->length())
2080 return equalIgnoringCase(a->characters8(), b->characters8(), length);
2082 return equalIgnoringCase(b->characters16(), a->characters8(), length);
2086 return equalIgnoringCase(a->characters16(), b->characters8(), length);
2088 return equalIgnoringCase(a->characters16(), b->characters16(), length);
2091 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
2093 if (!a && b && !b->length())
2095 if (!b && a && !a->length())
2100 size_t StringImpl::sizeInBytes() const
2102 size_t size = length();
2105 return size + sizeof(*this);