src/third_party/WebKit/Source/wtf/text/StringImpl.cpp

   1 /*
   2  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
   3  *           (C) 1999 Antti Koivisto (koivisto@kde.org)
   4  *           (C) 2001 Dirk Mueller ( mueller@kde.org )
   5  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved.
   6  * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Library General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Library General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Library General Public License
  19  * along with this library; see the file COPYING.LIB.  If not, write to
  20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  21  * Boston, MA 02110-1301, USA.
  22  *
  23  */
  24
  25 #include "config.h"
  26 #include "wtf/text/StringImpl.h"
  27
  28 #include "wtf/DynamicAnnotations.h"
  29 #include "wtf/LeakAnnotations.h"
  30 #include "wtf/MainThread.h"
  31 #include "wtf/OwnPtr.h"
  32 #include "wtf/PartitionAlloc.h"
  33 #include "wtf/PassOwnPtr.h"
  34 #include "wtf/StdLibExtras.h"
  35 #include "wtf/WTF.h"
  36 #include "wtf/text/AtomicString.h"
  37 #include "wtf/text/StringBuffer.h"
  38 #include "wtf/text/StringHash.h"
  39 #include "wtf/unicode/CharacterNames.h"
  40 #include <unicode/translit.h>
  41 #include <unicode/unistr.h>
  42
  43 #ifdef STRING_STATS
  44 #include "wtf/DataLog.h"
  45 #include "wtf/HashMap.h"
  46 #include "wtf/HashSet.h"
  47 #include "wtf/ProcessID.h"
  48 #include "wtf/RefCounted.h"
  49 #include "wtf/ThreadingPrimitives.h"
  50 #include <unistd.h>
  51 #endif
  52
  53 using namespace std;
  54
  55 namespace WTF {
  56
  57 using namespace Unicode;
  58
  59 COMPILE_ASSERT(sizeof(StringImpl) == 3 * sizeof(int), StringImpl_should_stay_small);
  60
  61 #ifdef STRING_STATS
  62
  63 static Mutex& statsMutex()
  64 {
  65     DEFINE_STATIC_LOCAL(Mutex, mutex, ());
  66     return mutex;
  67 }
  68
  69 static HashSet<void*>& liveStrings()
  70 {
  71     // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings.
  72     DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
  73     return strings;
  74 }
  75
  76 void addStringForStats(StringImpl* string)
  77 {
  78     MutexLocker locker(statsMutex());
  79     liveStrings().add(string);
  80 }
  81
  82 void removeStringForStats(StringImpl* string)
  83 {
  84     MutexLocker locker(statsMutex());
  85     liveStrings().remove(string);
  86 }
  87
  88 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet)
  89 {
  90     const unsigned kMaxSnippetLength = 64;
  91     snippet.clear();
  92
  93     size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
  94     if (expectedLength == kMaxSnippetLength)
  95         expectedLength += 3; // For the "...".
  96     ++expectedLength; // For the terminating '\0'.
  97     snippet.reserveCapacity(expectedLength);
  98
  99     size_t i;
 100     for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
 101         UChar c = (*string)[i];
 102         if (isASCIIPrintable(c))
 103             snippet.append(c);
 104         else
 105             snippet.append('?');
 106     }
 107     if (i < string->length()) {
 108         snippet.append('.');
 109         snippet.append('.');
 110         snippet.append('.');
 111     }
 112     snippet.append('\0');
 113 }
 114
 115 static bool isUnnecessarilyWide(const StringImpl* string)
 116 {
 117     if (string->is8Bit())
 118         return false;
 119     UChar c = 0;
 120     for (unsigned i = 0; i < string->length(); ++i)
 121         c |= (*string)[i] >> 8;
 122     return !c;
 123 }
 124
 125 class PerStringStats : public RefCounted<PerStringStats> {
 126 public:
 127     static PassRefPtr<PerStringStats> create()
 128     {
 129         return adoptRef(new PerStringStats);
 130     }
 131
 132     void add(const StringImpl* string)
 133     {
 134         ++m_numberOfCopies;
 135         if (!m_length) {
 136             m_length = string->length();
 137             fillWithSnippet(string, m_snippet);
 138         }
 139         if (string->isAtomic())
 140             ++m_numberOfAtomicCopies;
 141         if (isUnnecessarilyWide(string))
 142             m_unnecessarilyWide = true;
 143     }
 144
 145     size_t totalCharacters() const
 146     {
 147         return m_numberOfCopies * m_length;
 148     }
 149
 150     void print()
 151     {
 152         const char* status = "ok";
 153         if (m_unnecessarilyWide)
 154             status = "16";
 155         dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data());
 156     }
 157
 158     bool m_unnecessarilyWide;
 159     unsigned m_numberOfCopies;
 160     unsigned m_length;
 161     unsigned m_numberOfAtomicCopies;
 162     Vector<char> m_snippet;
 163
 164 private:
 165     PerStringStats()
 166         : m_unnecessarilyWide(false)
 167         , m_numberOfCopies(0)
 168         , m_length(0)
 169         , m_numberOfAtomicCopies(0)
 170     {
 171     }
 172 };
 173
 174 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b)
 175 {
 176     if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
 177         return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
 178     if (a->totalCharacters() != b->totalCharacters())
 179         return a->totalCharacters() < b->totalCharacters();
 180     if (a->m_numberOfCopies != b->m_numberOfCopies)
 181         return a->m_numberOfCopies < b->m_numberOfCopies;
 182     if (a->m_length != b->m_length)
 183         return a->m_length < b->m_length;
 184     return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
 185 }
 186
 187 static void printLiveStringStats(void*)
 188 {
 189     MutexLocker locker(statsMutex());
 190     HashSet<void*>& strings = liveStrings();
 191
 192     HashMap<StringImpl*, RefPtr<PerStringStats> > stats;
 193     for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) {
 194         StringImpl* string = static_cast<StringImpl*>(*iter);
 195         HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator entry = stats.find(string);
 196         RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) : entry->value;
 197         value->add(string);
 198         stats.set(string, value.release());
 199     }
 200
 201     Vector<RefPtr<PerStringStats> > all;
 202     for (HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator iter = stats.begin(); iter != stats.end(); ++iter)
 203         all.append(iter->value);
 204
 205     std::sort(all.begin(), all.end());
 206     std::reverse(all.begin(), all.end());
 207     for (size_t i = 0; i < 20 && i < all.size(); ++i)
 208         all[i]->print();
 209 }
 210
 211 StringStats StringImpl::m_stringStats;
 212
 213 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency;
 214
 215 void StringStats::removeString(StringImpl* string)
 216 {
 217     unsigned length = string->length();
 218     --m_totalNumberStrings;
 219
 220     if (string->is8Bit()) {
 221         --m_number8BitStrings;
 222         m_total8BitData -= length;
 223     } else {
 224         --m_number16BitStrings;
 225         m_total16BitData -= length;
 226     }
 227
 228     if (!--s_stringRemovesTillPrintStats) {
 229         s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
 230         printStats();
 231     }
 232 }
 233
 234 void StringStats::printStats()
 235 {
 236     dataLogF("String stats for process id %d:\n", getCurrentProcessID());
 237
 238     unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
 239     double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
 240     double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0;
 241     dataLogF("%8u (%5.2f%%) 8 bit        %12llu chars  %12llu bytes  avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength);
 242
 243     double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
 244     double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
 245     dataLogF("%8u (%5.2f%%) 16 bit       %12llu chars  %12llu bytes  avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
 246
 247     double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
 248     unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
 249     dataLogF("%8u Total                 %12llu chars  %12llu bytes  avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength);
 250     unsigned long long totalSavedBytes = m_total8BitData;
 251     double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;
 252     dataLogF("         Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
 253
 254     unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
 255     double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
 256     dataLogF("         StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, overheadPercent);
 257
 258     callOnMainThread(printLiveStringStats, 0);
 259 }
 260 #endif
 261
 262 void* StringImpl::operator new(size_t size)
 263 {
 264     ASSERT(size == sizeof(StringImpl));
 265     return partitionAllocGeneric(Partitions::getBufferPartition(), size);
 266 }
 267
 268 void StringImpl::operator delete(void* ptr)
 269 {
 270     partitionFreeGeneric(Partitions::getBufferPartition(), ptr);
 271 }
 272
 273 inline StringImpl::~StringImpl()
 274 {
 275     ASSERT(!isStatic());
 276
 277     STRING_STATS_REMOVE_STRING(this);
 278
 279     if (isAtomic())
 280         AtomicString::remove(this);
 281 }
 282
 283 void StringImpl::destroyIfNotStatic()
 284 {
 285     if (!isStatic())
 286         delete this;
 287 }
 288
 289 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
 290 {
 291     if (!length) {
 292         data = 0;
 293         return empty();
 294     }
 295
 296     // Allocate a single buffer large enough to contain the StringImpl
 297     // struct as well as the data which it contains. This removes one
 298     // heap allocation from this call.
 299     StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<LChar>(length)));
 300
 301     data = reinterpret_cast<LChar*>(string + 1);
 302     return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
 303 }
 304
 305 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
 306 {
 307     if (!length) {
 308         data = 0;
 309         return empty();
 310     }
 311
 312     // Allocate a single buffer large enough to contain the StringImpl
 313     // struct as well as the data which it contains. This removes one
 314     // heap allocation from this call.
 315     StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<UChar>(length)));
 316
 317     data = reinterpret_cast<UChar*>(string + 1);
 318     return adoptRef(new (string) StringImpl(length));
 319 }
 320
 321 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length)
 322 {
 323     ASSERT(originalString->hasOneRef());
 324
 325     if (!length)
 326         return empty();
 327
 328     bool is8Bit = originalString->is8Bit();
 329     // Same as createUninitialized() except here we use realloc.
 330     size_t size = is8Bit ? allocationSize<LChar>(length) : allocationSize<UChar>(length);
 331     originalString->~StringImpl();
 332     StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
 333     if (is8Bit)
 334         return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
 335     return adoptRef(new (string) StringImpl(length));
 336 }
 337
 338 static StaticStringsTable& staticStrings()
 339 {
 340     DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
 341     return staticStrings;
 342 }
 343
 344 #if ENABLE(ASSERT)
 345 static bool s_allowCreationOfStaticStrings = true;
 346 #endif
 347
 348 const StaticStringsTable& StringImpl::allStaticStrings()
 349 {
 350     return staticStrings();
 351 }
 352
 353 void StringImpl::freezeStaticStrings()
 354 {
 355     ASSERT(isMainThread());
 356
 357 #if ENABLE(ASSERT)
 358     s_allowCreationOfStaticStrings = false;
 359 #endif
 360 }
 361
 362 unsigned StringImpl::m_highestStaticStringLength = 0;
 363
 364 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash)
 365 {
 366     ASSERT(s_allowCreationOfStaticStrings);
 367     ASSERT(string);
 368     ASSERT(length);
 369
 370     StaticStringsTable::const_iterator it = staticStrings().find(hash);
 371     if (it != staticStrings().end()) {
 372         ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
 373         return it->value;
 374     }
 375
 376     // Allocate a single buffer large enough to contain the StringImpl
 377     // struct as well as the data which it contains. This removes one
 378     // heap allocation from this call.
 379     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
 380     size_t size = sizeof(StringImpl) + length * sizeof(LChar);
 381
 382     WTF_ANNOTATE_SCOPED_MEMORY_LEAK;
 383     StringImpl* impl = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
 384
 385     LChar* data = reinterpret_cast<LChar*>(impl + 1);
 386     impl = new (impl) StringImpl(length, hash, StaticString);
 387     memcpy(data, string, length * sizeof(LChar));
 388 #if ENABLE(ASSERT)
 389     impl->assertHashIsCorrect();
 390 #endif
 391
 392     ASSERT(isMainThread());
 393     m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
 394     staticStrings().add(hash, impl);
 395     WTF_ANNOTATE_BENIGN_RACE(impl,
 396         "Benign race on the reference counter of a static string created by StringImpl::createStatic");
 397
 398     return impl;
 399 }
 400
 401 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
 402 {
 403     if (!characters || !length)
 404         return empty();
 405
 406     UChar* data;
 407     RefPtr<StringImpl> string = createUninitialized(length, data);
 408     memcpy(data, characters, length * sizeof(UChar));
 409     return string.release();
 410 }
 411
 412 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
 413 {
 414     if (!characters || !length)
 415         return empty();
 416
 417     LChar* data;
 418     RefPtr<StringImpl> string = createUninitialized(length, data);
 419     memcpy(data, characters, length * sizeof(LChar));
 420     return string.release();
 421 }
 422
 423 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
 424 {
 425     if (!characters || !length)
 426         return empty();
 427
 428     LChar* data;
 429     RefPtr<StringImpl> string = createUninitialized(length, data);
 430
 431     for (size_t i = 0; i < length; ++i) {
 432         if (characters[i] & 0xff00)
 433             return create(characters, length);
 434         data[i] = static_cast<LChar>(characters[i]);
 435     }
 436
 437     return string.release();
 438 }
 439
 440 PassRefPtr<StringImpl> StringImpl::create(const LChar* string)
 441 {
 442     if (!string)
 443         return empty();
 444     size_t length = strlen(reinterpret_cast<const char*>(string));
 445     RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
 446     return create(string, length);
 447 }
 448
 449 bool StringImpl::containsOnlyWhitespace()
 450 {
 451     // FIXME: The definition of whitespace here includes a number of characters
 452     // that are not whitespace from the point of view of RenderText; I wonder if
 453     // that's a problem in practice.
 454     if (is8Bit()) {
 455         for (unsigned i = 0; i < m_length; ++i) {
 456             UChar c = characters8()[i];
 457             if (!isASCIISpace(c))
 458                 return false;
 459         }
 460
 461         return true;
 462     }
 463
 464     for (unsigned i = 0; i < m_length; ++i) {
 465         UChar c = characters16()[i];
 466         if (!isASCIISpace(c))
 467             return false;
 468     }
 469     return true;
 470 }
 471
 472 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
 473 {
 474     if (start >= m_length)
 475         return empty();
 476     unsigned maxLength = m_length - start;
 477     if (length >= maxLength) {
 478         if (!start)
 479             return this;
 480         length = maxLength;
 481     }
 482     if (is8Bit())
 483         return create(characters8() + start, length);
 484
 485     return create(characters16() + start, length);
 486 }
 487
 488 UChar32 StringImpl::characterStartingAt(unsigned i)
 489 {
 490     if (is8Bit())
 491         return characters8()[i];
 492     if (U16_IS_SINGLE(characters16()[i]))
 493         return characters16()[i];
 494     if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(characters16()[i + 1]))
 495         return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
 496     return 0;
 497 }
 498
 499 PassRefPtr<StringImpl> StringImpl::lower()
 500 {
 501     // Note: This is a hot function in the Dromaeo benchmark, specifically the
 502     // no-op code path up through the first 'return' statement.
 503
 504     // First scan the string for uppercase and non-ASCII characters:
 505     bool noUpper = true;
 506     UChar ored = 0;
 507     if (is8Bit()) {
 508         const LChar* end = characters8() + m_length;
 509         for (const LChar* chp = characters8(); chp != end; ++chp) {
 510             if (UNLIKELY(isASCIIUpper(*chp)))
 511                 noUpper = false;
 512             ored |= *chp;
 513         }
 514         // Nothing to do if the string is all ASCII with no uppercase.
 515         if (noUpper && !(ored & ~0x7F))
 516             return this;
 517
 518         RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
 519         int32_t length = m_length;
 520
 521         LChar* data8;
 522         RefPtr<StringImpl> newImpl = createUninitialized(length, data8);
 523
 524         if (!(ored & ~0x7F)) {
 525             for (int32_t i = 0; i < length; ++i)
 526                 data8[i] = toASCIILower(characters8()[i]);
 527
 528             return newImpl.release();
 529         }
 530
 531         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
 532         for (int32_t i = 0; i < length; ++i)
 533             data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
 534
 535         return newImpl.release();
 536     }
 537
 538     const UChar* end = characters16() + m_length;
 539     for (const UChar* chp = characters16(); chp != end; ++chp) {
 540         if (UNLIKELY(isASCIIUpper(*chp)))
 541             noUpper = false;
 542         ored |= *chp;
 543     }
 544     // Nothing to do if the string is all ASCII with no uppercase.
 545     if (noUpper && !(ored & ~0x7F))
 546         return this;
 547
 548     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
 549     int32_t length = m_length;
 550
 551     if (!(ored & ~0x7F)) {
 552         UChar* data16;
 553         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
 554
 555         for (int32_t i = 0; i < length; ++i) {
 556             UChar c = characters16()[i];
 557             data16[i] = toASCIILower(c);
 558         }
 559         return newImpl.release();
 560     }
 561
 562     // Do a slower implementation for cases that include non-ASCII characters.
 563     UChar* data16;
 564     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
 565
 566     bool error;
 567     int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length, &error);
 568     if (!error && realLength == length)
 569         return newImpl.release();
 570
 571     newImpl = createUninitialized(realLength, data16);
 572     Unicode::toLower(data16, realLength, characters16(), m_length, &error);
 573     if (error)
 574         return this;
 575     return newImpl.release();
 576 }
 577
 578 PassRefPtr<StringImpl> StringImpl::upper()
 579 {
 580     // This function could be optimized for no-op cases the way lower() is,
 581     // but in empirical testing, few actual calls to upper() are no-ops, so
 582     // it wouldn't be worth the extra time for pre-scanning.
 583
 584     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
 585     int32_t length = m_length;
 586
 587     if (is8Bit()) {
 588         LChar* data8;
 589         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
 590
 591         // Do a faster loop for the case where all the characters are ASCII.
 592         LChar ored = 0;
 593         for (int i = 0; i < length; ++i) {
 594             LChar c = characters8()[i];
 595             ored |= c;
 596             data8[i] = toASCIIUpper(c);
 597         }
 598         if (!(ored & ~0x7F))
 599             return newImpl.release();
 600
 601         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
 602         int numberSharpSCharacters = 0;
 603
 604         // There are two special cases.
 605         //  1. latin-1 characters when converted to upper case are 16 bit characters.
 606         //  2. Lower case sharp-S converts to "SS" (two characters)
 607         for (int32_t i = 0; i < length; ++i) {
 608             LChar c = characters8()[i];
 609             if (UNLIKELY(c == smallLetterSharpS))
 610                 ++numberSharpSCharacters;
 611             UChar upper = Unicode::toUpper(c);
 612             if (UNLIKELY(upper > 0xff)) {
 613                 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
 614                 goto upconvert;
 615             }
 616             data8[i] = static_cast<LChar>(upper);
 617         }
 618
 619         if (!numberSharpSCharacters)
 620             return newImpl.release();
 621
 622         // We have numberSSCharacters sharp-s characters, but none of the other special characters.
 623         newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
 624
 625         LChar* dest = data8;
 626
 627         for (int32_t i = 0; i < length; ++i) {
 628             LChar c = characters8()[i];
 629             if (c == smallLetterSharpS) {
 630                 *dest++ = 'S';
 631                 *dest++ = 'S';
 632             } else
 633                 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
 634         }
 635
 636         return newImpl.release();
 637     }
 638
 639 upconvert:
 640     RefPtr<StringImpl> upconverted = upconvertedString();
 641     const UChar* source16 = upconverted->characters16();
 642
 643     UChar* data16;
 644     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
 645
 646     // Do a faster loop for the case where all the characters are ASCII.
 647     UChar ored = 0;
 648     for (int i = 0; i < length; ++i) {
 649         UChar c = source16[i];
 650         ored |= c;
 651         data16[i] = toASCIIUpper(c);
 652     }
 653     if (!(ored & ~0x7F))
 654         return newImpl.release();
 655
 656     // Do a slower implementation for cases that include non-ASCII characters.
 657     bool error;
 658     int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error);
 659     if (!error && realLength == length)
 660         return newImpl;
 661     newImpl = createUninitialized(realLength, data16);
 662     Unicode::toUpper(data16, realLength, source16, m_length, &error);
 663     if (error)
 664         return this;
 665     return newImpl.release();
 666 }
 667
 668 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang)
 669 {
 670     if (equalIgnoringCase(localeId, lang))
 671         return true;
 672     static char localeIdPrefix[4];
 673     static const char delimeter[4] = "-_@";
 674
 675     size_t langLength = strlen(lang);
 676     RELEASE_ASSERT(langLength >= 2 && langLength <= 3);
 677     strncpy(localeIdPrefix, lang, langLength);
 678     for (int i = 0; i < 3; ++i) {
 679         localeIdPrefix[langLength] = delimeter[i];
 680         // case-insensitive comparison
 681         if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langLength + 1, false))
 682             return true;
 683     }
 684     return false;
 685 }
 686
 687 typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, const char*, UErrorCode*);
 688
 689 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length, icuCaseConverter converter, const char* locale, StringImpl* originalString)
 690 {
 691     UChar* data16;
 692     int32_t targetLength = length;
 693     RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);
 694     do {
 695         UErrorCode status = U_ZERO_ERROR;
 696         targetLength = converter(data16, targetLength, source16, length, locale, &status);
 697         if (U_SUCCESS(status)) {
 698             output->truncateAssumingIsolated(targetLength);
 699             return output.release();
 700         }
 701         if (status != U_BUFFER_OVERFLOW_ERROR)
 702             return originalString;
 703         // Expand the buffer.
 704         output = StringImpl::createUninitialized(targetLength, data16);
 705     } while (true);
 706 }
 707
 708 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
 709 {
 710     // Use the more-optimized code path most of the time.
 711     // Only Turkic (tr and az) languages and Lithuanian requires
 712     // locale-specific lowercasing rules. Even though CLDR has el-Lower,
 713     // it's identical to the locale-agnostic lowercasing. Context-dependent
 714     // handling of Greek capital sigma is built into the common lowercasing
 715     // function in ICU.
 716     const char* localeForConversion = 0;
 717     if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
 718         localeForConversion = "tr";
 719     else if (localeIdMatchesLang(localeIdentifier, "lt"))
 720         localeForConversion = "lt";
 721     else
 722         return lower();
 723
 724     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
 725         CRASH();
 726     int length = m_length;
 727
 728     RefPtr<StringImpl> upconverted = upconvertedString();
 729     const UChar* source16 = upconverted->characters16();
 730     return caseConvert(source16, length, u_strToLower, localeForConversion, this);
 731 }
 732
 733 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
 734 {
 735     // Use the more-optimized code path most of the time.
 736     // Only Turkic (tr and az) languages and Greek require locale-specific
 737     // lowercasing rules.
 738     icu::UnicodeString transliteratorId;
 739     const char* localeForConversion = 0;
 740     if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
 741         localeForConversion = "tr";
 742     else if (localeIdMatchesLang(localeIdentifier, "el"))
 743         transliteratorId = UNICODE_STRING_SIMPLE("el-Upper");
 744     else if (localeIdMatchesLang(localeIdentifier, "lt"))
 745         localeForConversion = "lt";
 746     else
 747         return upper();
 748
 749     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
 750         CRASH();
 751     int length = m_length;
 752
 753     RefPtr<StringImpl> upconverted = upconvertedString();
 754     const UChar* source16 = upconverted->characters16();
 755
 756     if (localeForConversion)
 757         return caseConvert(source16, length, u_strToUpper, localeForConversion, this);
 758
 759     // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek.
 760     UErrorCode status = U_ZERO_ERROR;
 761     OwnPtr<icu::Transliterator> translit =
 762         adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FORWARD, status));
 763     if (U_FAILURE(status))
 764         return upper();
 765
 766     // target will be copy-on-write.
 767     icu::UnicodeString target(false, source16, length);
 768     translit->transliterate(target);
 769
 770     return create(target.getBuffer(), target.length());
 771 }
 772
 773 PassRefPtr<StringImpl> StringImpl::fill(UChar character)
 774 {
 775     if (!(character & ~0x7F)) {
 776         LChar* data;
 777         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 778         for (unsigned i = 0; i < m_length; ++i)
 779             data[i] = character;
 780         return newImpl.release();
 781     }
 782     UChar* data;
 783     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 784     for (unsigned i = 0; i < m_length; ++i)
 785         data[i] = character;
 786     return newImpl.release();
 787 }
 788
 789 PassRefPtr<StringImpl> StringImpl::foldCase()
 790 {
 791     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
 792     int32_t length = m_length;
 793
 794     if (is8Bit()) {
 795         // Do a faster loop for the case where all the characters are ASCII.
 796         LChar* data;
 797         RefPtr <StringImpl>newImpl = createUninitialized(m_length, data);
 798         LChar ored = 0;
 799
 800         for (int32_t i = 0; i < length; ++i) {
 801             LChar c = characters8()[i];
 802             data[i] = toASCIILower(c);
 803             ored |= c;
 804         }
 805
 806         if (!(ored & ~0x7F))
 807             return newImpl.release();
 808
 809         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
 810         for (int32_t i = 0; i < length; ++i)
 811             data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
 812
 813         return newImpl.release();
 814     }
 815
 816     // Do a faster loop for the case where all the characters are ASCII.
 817     UChar* data;
 818     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 819     UChar ored = 0;
 820     for (int32_t i = 0; i < length; ++i) {
 821         UChar c = characters16()[i];
 822         ored |= c;
 823         data[i] = toASCIILower(c);
 824     }
 825     if (!(ored & ~0x7F))
 826         return newImpl.release();
 827
 828     // Do a slower implementation for cases that include non-ASCII characters.
 829     bool error;
 830     int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error);
 831     if (!error && realLength == length)
 832         return newImpl.release();
 833     newImpl = createUninitialized(realLength, data);
 834     Unicode::foldCase(data, realLength, characters16(), m_length, &error);
 835     if (error)
 836         return this;
 837     return newImpl.release();
 838 }
 839
 840 template <class UCharPredicate>
 841 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
 842 {
 843     if (!m_length)
 844         return empty();
 845
 846     unsigned start = 0;
 847     unsigned end = m_length - 1;
 848
 849     // skip white space from start
 850     while (start <= end && predicate(is8Bit() ? characters8()[start] : characters16()[start]))
 851         ++start;
 852
 853     // only white space
 854     if (start > end)
 855         return empty();
 856
 857     // skip white space from end
 858     while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
 859         --end;
 860
 861     if (!start && end == m_length - 1)
 862         return this;
 863     if (is8Bit())
 864         return create(characters8() + start, end + 1 - start);
 865     return create(characters16() + start, end + 1 - start);
 866 }
 867
 868 class UCharPredicate {
 869 public:
 870     inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { }
 871
 872     inline bool operator()(UChar ch) const
 873     {
 874         return m_function(ch);
 875     }
 876
 877 private:
 878     const CharacterMatchFunctionPtr m_function;
 879 };
 880
 881 class SpaceOrNewlinePredicate {
 882 public:
 883     inline bool operator()(UChar ch) const
 884     {
 885         return isSpaceOrNewline(ch);
 886     }
 887 };
 888
 889 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
 890 {
 891     return stripMatchedCharacters(SpaceOrNewlinePredicate());
 892 }
 893
 894 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
 895 {
 896     return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
 897 }
 898
 899 template <typename CharType>
 900 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch)
 901 {
 902     const CharType* from = characters;
 903     const CharType* fromend = from + m_length;
 904
 905     // Assume the common case will not remove any characters
 906     while (from != fromend && !findMatch(*from))
 907         ++from;
 908     if (from == fromend)
 909         return this;
 910
 911     StringBuffer<CharType> data(m_length);
 912     CharType* to = data.characters();
 913     unsigned outc = from - characters;
 914
 915     if (outc)
 916         memcpy(to, characters, outc * sizeof(CharType));
 917
 918     while (true) {
 919         while (from != fromend && findMatch(*from))
 920             ++from;
 921         while (from != fromend && !findMatch(*from))
 922             to[outc++] = *from++;
 923         if (from == fromend)
 924             break;
 925     }
 926
 927     data.shrink(outc);
 928
 929     return data.release();
 930 }
 931
 932 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
 933 {
 934     if (is8Bit())
 935         return removeCharacters(characters8(), findMatch);
 936     return removeCharacters(characters16(), findMatch);
 937 }
 938
 939 template <typename CharType, class UCharPredicate>
 940 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate, StripBehavior stripBehavior)
 941 {
 942     StringBuffer<CharType> data(m_length);
 943
 944     const CharType* from = getCharacters<CharType>();
 945     const CharType* fromend = from + m_length;
 946     int outc = 0;
 947     bool changedToSpace = false;
 948
 949     CharType* to = data.characters();
 950
 951     if (stripBehavior == StripExtraWhiteSpace) {
 952         while (true) {
 953             while (from != fromend && predicate(*from)) {
 954                 if (*from != ' ')
 955                     changedToSpace = true;
 956                 ++from;
 957             }
 958             while (from != fromend && !predicate(*from))
 959                 to[outc++] = *from++;
 960             if (from != fromend)
 961                 to[outc++] = ' ';
 962             else
 963                 break;
 964         }
 965
 966         if (outc > 0 && to[outc - 1] == ' ')
 967             --outc;
 968     } else {
 969         for (; from != fromend; ++from) {
 970             if (predicate(*from)) {
 971                 if (*from != ' ')
 972                     changedToSpace = true;
 973                 to[outc++] = ' ';
 974             } else {
 975                 to[outc++] = *from;
 976             }
 977         }
 978     }
 979
 980     if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
 981         return this;
 982
 983     data.shrink(outc);
 984
 985     return data.release();
 986 }
 987
 988 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior)
 989 {
 990     if (is8Bit())
 991         return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate(), stripBehavior);
 992     return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate(), stripBehavior);
 993 }
 994
 995 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior)
 996 {
 997     if (is8Bit())
 998         return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace), stripBehavior);
 999     return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWhiteSpace), stripBehavior);
1000 }
1001
1002 int StringImpl::toIntStrict(bool* ok, int base)
1003 {
1004     if (is8Bit())
1005         return charactersToIntStrict(characters8(), m_length, ok, base);
1006     return charactersToIntStrict(characters16(), m_length, ok, base);
1007 }
1008
1009 unsigned StringImpl::toUIntStrict(bool* ok, int base)
1010 {
1011     if (is8Bit())
1012         return charactersToUIntStrict(characters8(), m_length, ok, base);
1013     return charactersToUIntStrict(characters16(), m_length, ok, base);
1014 }
1015
1016 int64_t StringImpl::toInt64Strict(bool* ok, int base)
1017 {
1018     if (is8Bit())
1019         return charactersToInt64Strict(characters8(), m_length, ok, base);
1020     return charactersToInt64Strict(characters16(), m_length, ok, base);
1021 }
1022
1023 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
1024 {
1025     if (is8Bit())
1026         return charactersToUInt64Strict(characters8(), m_length, ok, base);
1027     return charactersToUInt64Strict(characters16(), m_length, ok, base);
1028 }
1029
1030 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
1031 {
1032     if (is8Bit())
1033         return charactersToIntPtrStrict(characters8(), m_length, ok, base);
1034     return charactersToIntPtrStrict(characters16(), m_length, ok, base);
1035 }
1036
1037 int StringImpl::toInt(bool* ok)
1038 {
1039     if (is8Bit())
1040         return charactersToInt(characters8(), m_length, ok);
1041     return charactersToInt(characters16(), m_length, ok);
1042 }
1043
1044 unsigned StringImpl::toUInt(bool* ok)
1045 {
1046     if (is8Bit())
1047         return charactersToUInt(characters8(), m_length, ok);
1048     return charactersToUInt(characters16(), m_length, ok);
1049 }
1050
1051 int64_t StringImpl::toInt64(bool* ok)
1052 {
1053     if (is8Bit())
1054         return charactersToInt64(characters8(), m_length, ok);
1055     return charactersToInt64(characters16(), m_length, ok);
1056 }
1057
1058 uint64_t StringImpl::toUInt64(bool* ok)
1059 {
1060     if (is8Bit())
1061         return charactersToUInt64(characters8(), m_length, ok);
1062     return charactersToUInt64(characters16(), m_length, ok);
1063 }
1064
1065 intptr_t StringImpl::toIntPtr(bool* ok)
1066 {
1067     if (is8Bit())
1068         return charactersToIntPtr(characters8(), m_length, ok);
1069     return charactersToIntPtr(characters16(), m_length, ok);
1070 }
1071
1072 double StringImpl::toDouble(bool* ok)
1073 {
1074     if (is8Bit())
1075         return charactersToDouble(characters8(), m_length, ok);
1076     return charactersToDouble(characters16(), m_length, ok);
1077 }
1078
1079 float StringImpl::toFloat(bool* ok)
1080 {
1081     if (is8Bit())
1082         return charactersToFloat(characters8(), m_length, ok);
1083     return charactersToFloat(characters16(), m_length, ok);
1084 }
1085
1086 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
1087 {
1088     while (length--) {
1089         LChar bc = *b++;
1090         if (foldCase(*a++) != foldCase(bc))
1091             return false;
1092     }
1093     return true;
1094 }
1095
1096 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
1097 {
1098     while (length--) {
1099         LChar bc = *b++;
1100         if (foldCase(*a++) != foldCase(bc))
1101             return false;
1102     }
1103     return true;
1104 }
1105
1106 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
1107 {
1108     if (is8Bit())
1109         return WTF::find(characters8(), m_length, matchFunction, start);
1110     return WTF::find(characters16(), m_length, matchFunction, start);
1111 }
1112
1113 size_t StringImpl::find(const LChar* matchString, unsigned index)
1114 {
1115     // Check for null or empty string to match against
1116     if (!matchString)
1117         return kNotFound;
1118     size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1119     RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1120     unsigned matchLength = matchStringLength;
1121     if (!matchLength)
1122         return min(index, length());
1123
1124     // Optimization 1: fast case for strings of length 1.
1125     if (matchLength == 1)
1126         return WTF::find(characters16(), length(), *matchString, index);
1127
1128     // Check index & matchLength are in range.
1129     if (index > length())
1130         return kNotFound;
1131     unsigned searchLength = length() - index;
1132     if (matchLength > searchLength)
1133         return kNotFound;
1134     // delta is the number of additional times to test; delta == 0 means test only once.
1135     unsigned delta = searchLength - matchLength;
1136
1137     const UChar* searchCharacters = characters16() + index;
1138
1139     // Optimization 2: keep a running hash of the strings,
1140     // only call equal if the hashes match.
1141     unsigned searchHash = 0;
1142     unsigned matchHash = 0;
1143     for (unsigned i = 0; i < matchLength; ++i) {
1144         searchHash += searchCharacters[i];
1145         matchHash += matchString[i];
1146     }
1147
1148     unsigned i = 0;
1149     // keep looping until we match
1150     while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
1151         if (i == delta)
1152             return kNotFound;
1153         searchHash += searchCharacters[i + matchLength];
1154         searchHash -= searchCharacters[i];
1155         ++i;
1156     }
1157     return index + i;
1158 }
1159
1160 template<typename CharType>
1161 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchLength)
1162 {
1163     // delta is the number of additional times to test; delta == 0 means test only once.
1164     unsigned delta = searchLength - matchLength;
1165
1166     unsigned i = 0;
1167     while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
1168         if (i == delta)
1169             return kNotFound;
1170         ++i;
1171     }
1172     return index + i;
1173 }
1174
1175 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
1176 {
1177     // Check for null or empty string to match against
1178     if (!matchString)
1179         return kNotFound;
1180     size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1181     RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1182     unsigned matchLength = matchStringLength;
1183     if (!matchLength)
1184         return min(index, length());
1185
1186     // Check index & matchLength are in range.
1187     if (index > length())
1188         return kNotFound;
1189     unsigned searchLength = length() - index;
1190     if (matchLength > searchLength)
1191         return kNotFound;
1192
1193     if (is8Bit())
1194         return findIgnoringCaseInternal(characters8() + index, matchString, index, searchLength, matchLength);
1195     return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength);
1196 }
1197
1198 template <typename SearchCharacterType, typename MatchCharacterType>
1199 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1200 {
1201     // Optimization: keep a running hash of the strings,
1202     // only call equal() if the hashes match.
1203
1204     // delta is the number of additional times to test; delta == 0 means test only once.
1205     unsigned delta = searchLength - matchLength;
1206
1207     unsigned searchHash = 0;
1208     unsigned matchHash = 0;
1209
1210     for (unsigned i = 0; i < matchLength; ++i) {
1211         searchHash += searchCharacters[i];
1212         matchHash += matchCharacters[i];
1213     }
1214
1215     unsigned i = 0;
1216     // keep looping until we match
1217     while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
1218         if (i == delta)
1219             return kNotFound;
1220         searchHash += searchCharacters[i + matchLength];
1221         searchHash -= searchCharacters[i];
1222         ++i;
1223     }
1224     return index + i;
1225 }
1226
1227 size_t StringImpl::find(StringImpl* matchString)
1228 {
1229     // Check for null string to match against
1230     if (UNLIKELY(!matchString))
1231         return kNotFound;
1232     unsigned matchLength = matchString->length();
1233
1234     // Optimization 1: fast case for strings of length 1.
1235     if (matchLength == 1) {
1236         if (is8Bit()) {
1237             if (matchString->is8Bit())
1238                 return WTF::find(characters8(), length(), matchString->characters8()[0]);
1239             return WTF::find(characters8(), length(), matchString->characters16()[0]);
1240         }
1241         if (matchString->is8Bit())
1242             return WTF::find(characters16(), length(), matchString->characters8()[0]);
1243         return WTF::find(characters16(), length(), matchString->characters16()[0]);
1244     }
1245
1246     // Check matchLength is in range.
1247     if (matchLength > length())
1248         return kNotFound;
1249
1250     // Check for empty string to match against
1251     if (UNLIKELY(!matchLength))
1252         return 0;
1253
1254     if (is8Bit()) {
1255         if (matchString->is8Bit())
1256             return findInternal(characters8(), matchString->characters8(), 0, length(), matchLength);
1257         return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength);
1258     }
1259
1260     if (matchString->is8Bit())
1261         return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength);
1262
1263     return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength);
1264 }
1265
1266 size_t StringImpl::find(StringImpl* matchString, unsigned index)
1267 {
1268     // Check for null or empty string to match against
1269     if (UNLIKELY(!matchString))
1270         return kNotFound;
1271
1272     unsigned matchLength = matchString->length();
1273
1274     // Optimization 1: fast case for strings of length 1.
1275     if (matchLength == 1) {
1276         if (is8Bit())
1277             return WTF::find(characters8(), length(), (*matchString)[0], index);
1278         return WTF::find(characters16(), length(), (*matchString)[0], index);
1279     }
1280
1281     if (UNLIKELY(!matchLength))
1282         return min(index, length());
1283
1284     // Check index & matchLength are in range.
1285     if (index > length())
1286         return kNotFound;
1287     unsigned searchLength = length() - index;
1288     if (matchLength > searchLength)
1289         return kNotFound;
1290
1291     if (is8Bit()) {
1292         if (matchString->is8Bit())
1293             return findInternal(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1294         return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1295     }
1296
1297     if (matchString->is8Bit())
1298         return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1299
1300     return findInternal(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1301 }
1302
1303 template <typename SearchCharacterType, typename MatchCharacterType>
1304 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1305 {
1306     // delta is the number of additional times to test; delta == 0 means test only once.
1307     unsigned delta = searchLength - matchLength;
1308
1309     unsigned i = 0;
1310     // keep looping until we match
1311     while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
1312         if (i == delta)
1313             return kNotFound;
1314         ++i;
1315     }
1316     return index + i;
1317 }
1318
1319 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
1320 {
1321     // Check for null or empty string to match against
1322     if (!matchString)
1323         return kNotFound;
1324     unsigned matchLength = matchString->length();
1325     if (!matchLength)
1326         return min(index, length());
1327
1328     // Check index & matchLength are in range.
1329     if (index > length())
1330         return kNotFound;
1331     unsigned searchLength = length() - index;
1332     if (matchLength > searchLength)
1333         return kNotFound;
1334
1335     if (is8Bit()) {
1336         if (matchString->is8Bit())
1337             return findIgnoringCaseInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1338         return findIgnoringCaseInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1339     }
1340
1341     if (matchString->is8Bit())
1342         return findIgnoringCaseInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1343
1344     return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1345 }
1346
1347 size_t StringImpl::findNextLineStart(unsigned index)
1348 {
1349     if (is8Bit())
1350         return WTF::findNextLineStart(characters8(), m_length, index);
1351     return WTF::findNextLineStart(characters16(), m_length, index);
1352 }
1353
1354 size_t StringImpl::count(LChar c) const
1355 {
1356     int count = 0;
1357     if (is8Bit()) {
1358         for (size_t i = 0; i < m_length; ++i)
1359             count += characters8()[i] == c;
1360     } else {
1361         for (size_t i = 0; i < m_length; ++i)
1362             count += characters16()[i] == c;
1363     }
1364     return count;
1365 }
1366
1367 size_t StringImpl::reverseFind(UChar c, unsigned index)
1368 {
1369     if (is8Bit())
1370         return WTF::reverseFind(characters8(), m_length, c, index);
1371     return WTF::reverseFind(characters16(), m_length, c, index);
1372 }
1373
1374 template <typename SearchCharacterType, typename MatchCharacterType>
1375 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1376 {
1377     // Optimization: keep a running hash of the strings,
1378     // only call equal if the hashes match.
1379
1380     // delta is the number of additional times to test; delta == 0 means test only once.
1381     unsigned delta = min(index, length - matchLength);
1382
1383     unsigned searchHash = 0;
1384     unsigned matchHash = 0;
1385     for (unsigned i = 0; i < matchLength; ++i) {
1386         searchHash += searchCharacters[delta + i];
1387         matchHash += matchCharacters[i];
1388     }
1389
1390     // keep looping until we match
1391     while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) {
1392         if (!delta)
1393             return kNotFound;
1394         --delta;
1395         searchHash -= searchCharacters[delta + matchLength];
1396         searchHash += searchCharacters[delta];
1397     }
1398     return delta;
1399 }
1400
1401 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
1402 {
1403     // Check for null or empty string to match against
1404     if (!matchString)
1405         return kNotFound;
1406     unsigned matchLength = matchString->length();
1407     unsigned ourLength = length();
1408     if (!matchLength)
1409         return min(index, ourLength);
1410
1411     // Optimization 1: fast case for strings of length 1.
1412     if (matchLength == 1) {
1413         if (is8Bit())
1414             return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
1415         return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index);
1416     }
1417
1418     // Check index & matchLength are in range.
1419     if (matchLength > ourLength)
1420         return kNotFound;
1421
1422     if (is8Bit()) {
1423         if (matchString->is8Bit())
1424             return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1425         return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1426     }
1427
1428     if (matchString->is8Bit())
1429         return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1430
1431     return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1432 }
1433
1434 template <typename SearchCharacterType, typename MatchCharacterType>
1435 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1436 {
1437     // delta is the number of additional times to test; delta == 0 means test only once.
1438     unsigned delta = min(index, length - matchLength);
1439
1440     // keep looping until we match
1441     while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
1442         if (!delta)
1443             return kNotFound;
1444         --delta;
1445     }
1446     return delta;
1447 }
1448
1449 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
1450 {
1451     // Check for null or empty string to match against
1452     if (!matchString)
1453         return kNotFound;
1454     unsigned matchLength = matchString->length();
1455     unsigned ourLength = length();
1456     if (!matchLength)
1457         return min(index, ourLength);
1458
1459     // Check index & matchLength are in range.
1460     if (matchLength > ourLength)
1461         return kNotFound;
1462
1463     if (is8Bit()) {
1464         if (matchString->is8Bit())
1465             return reverseFindIgnoringCaseInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1466         return reverseFindIgnoringCaseInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1467     }
1468
1469     if (matchString->is8Bit())
1470         return reverseFindIgnoringCaseInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1471
1472     return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1473 }
1474
1475 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned startOffset, const char* matchString, unsigned matchLength, bool caseSensitive)
1476 {
1477     ASSERT(stringImpl);
1478     ASSERT(matchLength <= stringImpl->length());
1479     ASSERT(startOffset + matchLength <= stringImpl->length());
1480
1481     if (caseSensitive) {
1482         if (stringImpl->is8Bit())
1483             return equal(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1484         return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1485     }
1486     if (stringImpl->is8Bit())
1487         return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1488     return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1489 }
1490
1491 bool StringImpl::startsWith(UChar character) const
1492 {
1493     return m_length && (*this)[0] == character;
1494 }
1495
1496 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1497 {
1498     ASSERT(matchLength);
1499     if (matchLength > length())
1500         return false;
1501     return equalInner(this, 0, matchString, matchLength, caseSensitive);
1502 }
1503
1504 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
1505 {
1506     ASSERT(matchString);
1507     if (m_length >= matchString->m_length) {
1508         unsigned start = m_length - matchString->m_length;
1509         return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start;
1510     }
1511     return false;
1512 }
1513
1514 bool StringImpl::endsWith(UChar character) const
1515 {
1516     return m_length && (*this)[m_length - 1] == character;
1517 }
1518
1519 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1520 {
1521     ASSERT(matchLength);
1522     if (matchLength > length())
1523         return false;
1524     unsigned startOffset = length() - matchLength;
1525     return equalInner(this, startOffset, matchString, matchLength, caseSensitive);
1526 }
1527
1528 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
1529 {
1530     if (oldC == newC)
1531         return this;
1532
1533     if (find(oldC) == kNotFound)
1534         return this;
1535
1536     unsigned i;
1537     if (is8Bit()) {
1538         if (newC <= 0xff) {
1539             LChar* data;
1540             LChar oldChar = static_cast<LChar>(oldC);
1541             LChar newChar = static_cast<LChar>(newC);
1542
1543             RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1544
1545             for (i = 0; i != m_length; ++i) {
1546                 LChar ch = characters8()[i];
1547                 if (ch == oldChar)
1548                     ch = newChar;
1549                 data[i] = ch;
1550             }
1551             return newImpl.release();
1552         }
1553
1554         // There is the possibility we need to up convert from 8 to 16 bit,
1555         // create a 16 bit string for the result.
1556         UChar* data;
1557         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1558
1559         for (i = 0; i != m_length; ++i) {
1560             UChar ch = characters8()[i];
1561             if (ch == oldC)
1562                 ch = newC;
1563             data[i] = ch;
1564         }
1565
1566         return newImpl.release();
1567     }
1568
1569     UChar* data;
1570     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1571
1572     for (i = 0; i != m_length; ++i) {
1573         UChar ch = characters16()[i];
1574         if (ch == oldC)
1575             ch = newC;
1576         data[i] = ch;
1577     }
1578     return newImpl.release();
1579 }
1580
1581 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
1582 {
1583     position = min(position, length());
1584     lengthToReplace = min(lengthToReplace, length() - position);
1585     unsigned lengthToInsert = str ? str->length() : 0;
1586     if (!lengthToReplace && !lengthToInsert)
1587         return this;
1588
1589     RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert));
1590
1591     if (is8Bit() && (!str || str->is8Bit())) {
1592         LChar* data;
1593         RefPtr<StringImpl> newImpl =
1594         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1595         memcpy(data, characters8(), position * sizeof(LChar));
1596         if (str)
1597             memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar));
1598         memcpy(data + position + lengthToInsert, characters8() + position + lengthToReplace,
1599                (length() - position - lengthToReplace) * sizeof(LChar));
1600         return newImpl.release();
1601     }
1602     UChar* data;
1603     RefPtr<StringImpl> newImpl =
1604         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1605     if (is8Bit())
1606         for (unsigned i = 0; i < position; ++i)
1607             data[i] = characters8()[i];
1608     else
1609         memcpy(data, characters16(), position * sizeof(UChar));
1610     if (str) {
1611         if (str->is8Bit())
1612             for (unsigned i = 0; i < lengthToInsert; ++i)
1613                 data[i + position] = str->characters8()[i];
1614         else
1615             memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar));
1616     }
1617     if (is8Bit()) {
1618         for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
1619             data[i + position + lengthToInsert] = characters8()[i + position + lengthToReplace];
1620     } else {
1621         memcpy(data + position + lengthToInsert, characters16() + position + lengthToReplace,
1622             (length() - position - lengthToReplace) * sizeof(UChar));
1623     }
1624     return newImpl.release();
1625 }
1626
1627 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
1628 {
1629     if (!replacement)
1630         return this;
1631
1632     if (replacement->is8Bit())
1633         return replace(pattern, replacement->characters8(), replacement->length());
1634
1635     return replace(pattern, replacement->characters16(), replacement->length());
1636 }
1637
1638 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
1639 {
1640     ASSERT(replacement);
1641
1642     size_t srcSegmentStart = 0;
1643     unsigned matchCount = 0;
1644
1645     // Count the matches.
1646     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1647         ++matchCount;
1648         ++srcSegmentStart;
1649     }
1650
1651     // If we have 0 matches then we don't have to do any more work.
1652     if (!matchCount)
1653         return this;
1654
1655     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1656
1657     unsigned replaceSize = matchCount * repStrLength;
1658     unsigned newSize = m_length - matchCount;
1659     RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1660
1661     newSize += replaceSize;
1662
1663     // Construct the new data.
1664     size_t srcSegmentEnd;
1665     unsigned srcSegmentLength;
1666     srcSegmentStart = 0;
1667     unsigned dstOffset = 0;
1668
1669     if (is8Bit()) {
1670         LChar* data;
1671         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1672
1673         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1674             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1675             memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1676             dstOffset += srcSegmentLength;
1677             memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
1678             dstOffset += repStrLength;
1679             srcSegmentStart = srcSegmentEnd + 1;
1680         }
1681
1682         srcSegmentLength = m_length - srcSegmentStart;
1683         memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1684
1685         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1686
1687         return newImpl.release();
1688     }
1689
1690     UChar* data;
1691     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1692
1693     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1694         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1695         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1696
1697         dstOffset += srcSegmentLength;
1698         for (unsigned i = 0; i < repStrLength; ++i)
1699             data[i + dstOffset] = replacement[i];
1700
1701         dstOffset += repStrLength;
1702         srcSegmentStart = srcSegmentEnd + 1;
1703     }
1704
1705     srcSegmentLength = m_length - srcSegmentStart;
1706     memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1707
1708     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1709
1710     return newImpl.release();
1711 }
1712
1713 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
1714 {
1715     ASSERT(replacement);
1716
1717     size_t srcSegmentStart = 0;
1718     unsigned matchCount = 0;
1719
1720     // Count the matches.
1721     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1722         ++matchCount;
1723         ++srcSegmentStart;
1724     }
1725
1726     // If we have 0 matches then we don't have to do any more work.
1727     if (!matchCount)
1728         return this;
1729
1730     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1731
1732     unsigned replaceSize = matchCount * repStrLength;
1733     unsigned newSize = m_length - matchCount;
1734     RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1735
1736     newSize += replaceSize;
1737
1738     // Construct the new data.
1739     size_t srcSegmentEnd;
1740     unsigned srcSegmentLength;
1741     srcSegmentStart = 0;
1742     unsigned dstOffset = 0;
1743
1744     if (is8Bit()) {
1745         UChar* data;
1746         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1747
1748         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1749             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1750             for (unsigned i = 0; i < srcSegmentLength; ++i)
1751                 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1752
1753             dstOffset += srcSegmentLength;
1754             memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1755
1756             dstOffset += repStrLength;
1757             srcSegmentStart = srcSegmentEnd + 1;
1758         }
1759
1760         srcSegmentLength = m_length - srcSegmentStart;
1761         for (unsigned i = 0; i < srcSegmentLength; ++i)
1762             data[i + dstOffset] = characters8()[i + srcSegmentStart];
1763
1764         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1765
1766         return newImpl.release();
1767     }
1768
1769     UChar* data;
1770     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1771
1772     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1773         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1774         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1775
1776         dstOffset += srcSegmentLength;
1777         memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1778
1779         dstOffset += repStrLength;
1780         srcSegmentStart = srcSegmentEnd + 1;
1781     }
1782
1783     srcSegmentLength = m_length - srcSegmentStart;
1784     memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1785
1786     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1787
1788     return newImpl.release();
1789 }
1790
1791 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
1792 {
1793     if (!pattern || !replacement)
1794         return this;
1795
1796     unsigned patternLength = pattern->length();
1797     if (!patternLength)
1798         return this;
1799
1800     unsigned repStrLength = replacement->length();
1801     size_t srcSegmentStart = 0;
1802     unsigned matchCount = 0;
1803
1804     // Count the matches.
1805     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1806         ++matchCount;
1807         srcSegmentStart += patternLength;
1808     }
1809
1810     // If we have 0 matches, we don't have to do any more work
1811     if (!matchCount)
1812         return this;
1813
1814     unsigned newSize = m_length - matchCount * patternLength;
1815     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1816
1817     RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repStrLength));
1818
1819     newSize += matchCount * repStrLength;
1820
1821
1822     // Construct the new data
1823     size_t srcSegmentEnd;
1824     unsigned srcSegmentLength;
1825     srcSegmentStart = 0;
1826     unsigned dstOffset = 0;
1827     bool srcIs8Bit = is8Bit();
1828     bool replacementIs8Bit = replacement->is8Bit();
1829
1830     // There are 4 cases:
1831     // 1. This and replacement are both 8 bit.
1832     // 2. This and replacement are both 16 bit.
1833     // 3. This is 8 bit and replacement is 16 bit.
1834     // 4. This is 16 bit and replacement is 8 bit.
1835     if (srcIs8Bit && replacementIs8Bit) {
1836         // Case 1
1837         LChar* data;
1838         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1839         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1840             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1841             memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1842             dstOffset += srcSegmentLength;
1843             memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar));
1844             dstOffset += repStrLength;
1845             srcSegmentStart = srcSegmentEnd + patternLength;
1846         }
1847
1848         srcSegmentLength = m_length - srcSegmentStart;
1849         memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1850
1851         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1852
1853         return newImpl.release();
1854     }
1855
1856     UChar* data;
1857     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1858     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1859         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1860         if (srcIs8Bit) {
1861             // Case 3.
1862             for (unsigned i = 0; i < srcSegmentLength; ++i)
1863                 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1864         } else {
1865             // Case 2 & 4.
1866             memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1867         }
1868         dstOffset += srcSegmentLength;
1869         if (replacementIs8Bit) {
1870             // Cases 2 & 3.
1871             for (unsigned i = 0; i < repStrLength; ++i)
1872                 data[i + dstOffset] = replacement->characters8()[i];
1873         } else {
1874             // Case 4
1875             memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar));
1876         }
1877         dstOffset += repStrLength;
1878         srcSegmentStart = srcSegmentEnd + patternLength;
1879     }
1880
1881     srcSegmentLength = m_length - srcSegmentStart;
1882     if (srcIs8Bit) {
1883         // Case 3.
1884         for (unsigned i = 0; i < srcSegmentLength; ++i)
1885             data[i + dstOffset] = characters8()[i + srcSegmentStart];
1886     } else {
1887         // Cases 2 & 4.
1888         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1889     }
1890
1891     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1892
1893     return newImpl.release();
1894 }
1895
1896 PassRefPtr<StringImpl> StringImpl::upconvertedString()
1897 {
1898     if (is8Bit())
1899         return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl();
1900     return this;
1901 }
1902
1903 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
1904 {
1905     unsigned aLength = a->length();
1906     unsigned bLength = b->length();
1907     if (aLength != bLength)
1908         return false;
1909
1910     if (a->is8Bit()) {
1911         if (b->is8Bit())
1912             return equal(a->characters8(), b->characters8(), aLength);
1913
1914         return equal(a->characters8(), b->characters16(), aLength);
1915     }
1916
1917     if (b->is8Bit())
1918         return equal(a->characters16(), b->characters8(), aLength);
1919
1920     return equal(a->characters16(), b->characters16(), aLength);
1921 }
1922
1923 bool equal(const StringImpl* a, const StringImpl* b)
1924 {
1925     if (a == b)
1926         return true;
1927     if (!a || !b)
1928         return false;
1929     if (a->isAtomic() && b->isAtomic())
1930         return false;
1931
1932     return stringImplContentEqual(a, b);
1933 }
1934
1935 template <typename CharType>
1936 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned length)
1937 {
1938     if (!a)
1939         return !b;
1940     if (!b)
1941         return false;
1942
1943     if (a->length() != length)
1944         return false;
1945     if (a->is8Bit())
1946         return equal(a->characters8(), b, length);
1947     return equal(a->characters16(), b, length);
1948 }
1949
1950 bool equal(const StringImpl* a, const LChar* b, unsigned length)
1951 {
1952     return equalInternal(a, b, length);
1953 }
1954
1955 bool equal(const StringImpl* a, const UChar* b, unsigned length)
1956 {
1957     return equalInternal(a, b, length);
1958 }
1959
1960 bool equal(const StringImpl* a, const LChar* b)
1961 {
1962     if (!a)
1963         return !b;
1964     if (!b)
1965         return !a;
1966
1967     unsigned length = a->length();
1968
1969     if (a->is8Bit()) {
1970         const LChar* aPtr = a->characters8();
1971         for (unsigned i = 0; i != length; ++i) {
1972             LChar bc = b[i];
1973             LChar ac = aPtr[i];
1974             if (!bc)
1975                 return false;
1976             if (ac != bc)
1977                 return false;
1978         }
1979
1980         return !b[length];
1981     }
1982
1983     const UChar* aPtr = a->characters16();
1984     for (unsigned i = 0; i != length; ++i) {
1985         LChar bc = b[i];
1986         if (!bc)
1987             return false;
1988         if (aPtr[i] != bc)
1989             return false;
1990     }
1991
1992     return !b[length];
1993 }
1994
1995 bool equalNonNull(const StringImpl* a, const StringImpl* b)
1996 {
1997     ASSERT(a && b);
1998     if (a == b)
1999         return true;
2000
2001     return stringImplContentEqual(a, b);
2002 }
2003
2004 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
2005 {
2006     if (a == b)
2007         return true;
2008     if (!a || !b)
2009         return false;
2010
2011     return CaseFoldingHash::equal(a, b);
2012 }
2013
2014 bool equalIgnoringCase(const StringImpl* a, const LChar* b)
2015 {
2016     if (!a)
2017         return !b;
2018     if (!b)
2019         return !a;
2020
2021     unsigned length = a->length();
2022
2023     // Do a faster loop for the case where all the characters are ASCII.
2024     UChar ored = 0;
2025     bool equal = true;
2026     if (a->is8Bit()) {
2027         const LChar* as = a->characters8();
2028         for (unsigned i = 0; i != length; ++i) {
2029             LChar bc = b[i];
2030             if (!bc)
2031                 return false;
2032             UChar ac = as[i];
2033             ored |= ac;
2034             equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2035         }
2036
2037         // Do a slower implementation for cases that include non-ASCII characters.
2038         if (ored & ~0x7F) {
2039             equal = true;
2040             for (unsigned i = 0; i != length; ++i)
2041                 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2042         }
2043
2044         return equal && !b[length];
2045     }
2046
2047     const UChar* as = a->characters16();
2048     for (unsigned i = 0; i != length; ++i) {
2049         LChar bc = b[i];
2050         if (!bc)
2051             return false;
2052         UChar ac = as[i];
2053         ored |= ac;
2054         equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2055     }
2056
2057     // Do a slower implementation for cases that include non-ASCII characters.
2058     if (ored & ~0x7F) {
2059         equal = true;
2060         for (unsigned i = 0; i != length; ++i) {
2061             equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2062         }
2063     }
2064
2065     return equal && !b[length];
2066 }
2067
2068 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)
2069 {
2070     ASSERT(a && b);
2071     if (a == b)
2072         return true;
2073
2074     unsigned length = a->length();
2075     if (length != b->length())
2076         return false;
2077
2078     if (a->is8Bit()) {
2079         if (b->is8Bit())
2080             return equalIgnoringCase(a->characters8(), b->characters8(), length);
2081
2082         return equalIgnoringCase(b->characters16(), a->characters8(), length);
2083     }
2084
2085     if (b->is8Bit())
2086         return equalIgnoringCase(a->characters16(), b->characters8(), length);
2087
2088     return equalIgnoringCase(a->characters16(), b->characters16(), length);
2089 }
2090
2091 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
2092 {
2093     if (!a && b && !b->length())
2094         return true;
2095     if (!b && a && !a->length())
2096         return true;
2097     return equal(a, b);
2098 }
2099
2100 size_t StringImpl::sizeInBytes() const
2101 {
2102     size_t size = length();
2103     if (!is8Bit())
2104         size *= 2;
2105     return size + sizeof(*this);
2106 }
2107
2108 } // namespace WTF