Upstream version 5.34.104.0

[platform/framework/web/crosswalk.git] / src / third_party / WebKit / Source / wtf / text / StringImpl.cpp
diff --git a/src/third_party/WebKit/Source/wtf/text/StringImpl.cpp b/src/third_party/WebKit/Source/wtf/text/StringImpl.cpp

index d1c3627..7628ebf 100644 (file)
--- a/src/third_party/WebKit/Source/wtf/text/StringImpl.cpp
+++ b/src/third_party/WebKit/Source/wtf/text/StringImpl.cpp
@@ -28,18 +28,25 @@
  #include "wtf/DynamicAnnotations.h"
  #include "wtf/LeakAnnotations.h"
  #include "wtf/MainThread.h"
+#include "wtf/OwnPtr.h"
  #include "wtf/PartitionAlloc.h"
+#include "wtf/PassOwnPtr.h"
  #include "wtf/StdLibExtras.h"
  #include "wtf/WTF.h"
  #include "wtf/text/AtomicString.h"
  #include "wtf/text/StringBuffer.h"
  #include "wtf/text/StringHash.h"
  #include "wtf/unicode/CharacterNames.h"
+#include <unicode/translit.h>
+#include <unicode/unistr.h>
  
  #ifdef STRING_STATS
  #include "wtf/DataLog.h"
+#include "wtf/HashMap.h"
+#include "wtf/HashSet.h"
  #include "wtf/ProcessID.h"
  #include "wtf/RefCounted.h"
+#include "wtf/ThreadingPrimitives.h"
  #include <unistd.h>
  #endif
  
@@ -289,9 +296,7 @@ PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*&
      // Allocate a single buffer large enough to contain the StringImpl
      // struct as well as the data which it contains. This removes one
      // heap allocation from this call.
-    RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
-    size_t size = sizeof(StringImpl) + length * sizeof(LChar);
-    StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
+    StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<LChar>(length)));
  
      data = reinterpret_cast<LChar*>(string + 1);
      return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
@@ -307,57 +312,32 @@ PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*&
      // Allocate a single buffer large enough to contain the StringImpl
      // struct as well as the data which it contains. This removes one
      // heap allocation from this call.
-    RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)));
-    size_t size = sizeof(StringImpl) + length * sizeof(UChar);
-    StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
+    StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<UChar>(length)));
  
      data = reinterpret_cast<UChar*>(string + 1);
      return adoptRef(new (string) StringImpl(length));
  }
  
-PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data)
+PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length)
  {
-    ASSERT(originalString->is8Bit());
      ASSERT(originalString->hasOneRef());
  
-    if (!length) {
-        data = 0;
+    if (!length)
          return empty();
-    }
  
+    bool is8Bit = originalString->is8Bit();
      // Same as createUninitialized() except here we use realloc.
-    RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
-    size_t size = sizeof(StringImpl) + length * sizeof(LChar);
+    size_t size = is8Bit ? allocationSize<LChar>(length) : allocationSize<UChar>(length);
      originalString->~StringImpl();
      StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
-
-    data = reinterpret_cast<LChar*>(string + 1);
-    return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
-}
-
-PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data)
-{
-    ASSERT(!originalString->is8Bit());
-    ASSERT(originalString->hasOneRef());
-
-    if (!length) {
-        data = 0;
-        return empty();
-    }
-
-    // Same as createUninitialized() except here we use realloc.
-    RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)));
-    size_t size = sizeof(StringImpl) + length * sizeof(UChar);
-    originalString->~StringImpl();
-    StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
-
-    data = reinterpret_cast<UChar*>(string + 1);
+    if (is8Bit)
+        return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
      return adoptRef(new (string) StringImpl(length));
  }
  
-static Vector<StringImpl*>& staticStrings()
+static StaticStringsTable& staticStrings()
  {
-    DEFINE_STATIC_LOCAL(Vector<StringImpl*>, staticStrings, ());
+    DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
      return staticStrings;
  }
  
@@ -365,7 +345,7 @@ static Vector<StringImpl*>& staticStrings()
  static bool s_allowCreationOfStaticStrings = true;
  #endif
  
-const Vector<StringImpl*>& StringImpl::allStaticStrings()
+const StaticStringsTable& StringImpl::allStaticStrings()
  {
      return staticStrings();
  }
@@ -377,21 +357,28 @@ void StringImpl::freezeStaticStrings()
  #ifndef NDEBUG
      s_allowCreationOfStaticStrings = false;
  #endif
-
-    staticStrings().shrinkToFit();
  }
  
+unsigned StringImpl::m_highestStaticStringLength = 0;
+
  StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash)
  {
      ASSERT(s_allowCreationOfStaticStrings);
      ASSERT(string);
      ASSERT(length);
  
+    StaticStringsTable::const_iterator it = staticStrings().find(hash);
+    if (it != staticStrings().end()) {
+        ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
+        return it->value;
+    }
+
      // Allocate a single buffer large enough to contain the StringImpl
      // struct as well as the data which it contains. This removes one
      // heap allocation from this call.
      RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
      size_t size = sizeof(StringImpl) + length * sizeof(LChar);
+
      WTF_ANNOTATE_SCOPED_MEMORY_LEAK;
      StringImpl* impl = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
  
@@ -403,7 +390,8 @@ StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsign
  #endif
  
      ASSERT(isMainThread());
-    staticStrings().append(impl);
+    m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
+    staticStrings().add(hash, impl);
      WTF_ANNOTATE_BENIGN_RACE(impl,
          "Benign race on the reference counter of a static string created by StringImpl::createStatic");
  
@@ -677,72 +665,109 @@ upconvert:
      return newImpl.release();
  }
  
-PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
+static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang)
  {
-    // Use the more-optimized code path most of the time.
-    // Note the assumption here that the only locale-specific lowercasing is
-    // in the "tr" and "az" locales.
-    // FIXME: Could possibly optimize further by looking for the specific sequences
-    // that have locale-specific lowercasing. There are only three of them.
-    if (!(localeIdentifier == "tr" || localeIdentifier == "az"))
-        return lower();
+    if (equalIgnoringCase(localeId, lang))
+        return true;
+    static char localeIdPrefix[4];
+    static const char delimeter[4] = "-_@";
+
+    size_t langLength = strlen(lang);
+    RELEASE_ASSERT(langLength >= 2 && langLength <= 3);
+    strncpy(localeIdPrefix, lang, langLength);
+    for (int i = 0; i < 3; ++i) {
+        localeIdPrefix[langLength] = delimeter[i];
+        // case-insensitive comparison
+        if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langLength + 1, false))
+            return true;
+    }
+    return false;
+}
  
-    if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
-        CRASH();
-    int length = m_length;
+typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, const char*, UErrorCode*);
  
-    // Below, we pass in the hardcoded locale "tr". Passing that is more efficient than
-    // allocating memory just to turn localeIdentifier into a C string, and there is no
-    // difference between the uppercasing for "tr" and "az" locales.
-    RefPtr<StringImpl> upconverted = upconvertedString();
-    const UChar* source16 = upconverted->characters16();
+static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length, icuCaseConverter converter, const char* locale, StringImpl* originalString)
+{
      UChar* data16;
-    RefPtr<StringImpl> newString = createUninitialized(length, data16);
+    int32_t targetLength = length;
+    RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);
      do {
          UErrorCode status = U_ZERO_ERROR;
-        int realLength = u_strToLower(data16, length, source16, length, "tr", &status);
+        targetLength = converter(data16, targetLength, source16, length, locale, &status);
          if (U_SUCCESS(status)) {
-            newString->truncateAssumingIsolated(realLength);
-            return newString.release();
+            output->truncateAssumingIsolated(targetLength);
+            return output.release();
          }
          if (status != U_BUFFER_OVERFLOW_ERROR)
-            return this;
+            return originalString;
          // Expand the buffer.
-        newString = createUninitialized(realLength, data16);
+        output = StringImpl::createUninitialized(targetLength, data16);
      } while (true);
  }
  
+PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
+{
+    // Use the more-optimized code path most of the time.
+    // Only Turkic (tr and az) languages and Lithuanian requires
+    // locale-specific lowercasing rules. Even though CLDR has el-Lower,
+    // it's identical to the locale-agnostic lowercasing. Context-dependent
+    // handling of Greek capital sigma is built into the common lowercasing
+    // function in ICU.
+    const char* localeForConversion = 0;
+    if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
+        localeForConversion = "tr";
+    else if (localeIdMatchesLang(localeIdentifier, "lt"))
+        localeForConversion = "lt";
+    else
+        return lower();
+
+    if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
+        CRASH();
+    int length = m_length;
+
+    RefPtr<StringImpl> upconverted = upconvertedString();
+    const UChar* source16 = upconverted->characters16();
+    return caseConvert(source16, length, u_strToLower, localeForConversion, this);
+}
+
  PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
  {
      // Use the more-optimized code path most of the time.
-    // Note the assumption here that the only locale-specific uppercasing is of the
-    // letter "i" in the "tr" and "az" locales.
-    if (!(localeIdentifier == "tr" || localeIdentifier == "az") || find('i') == kNotFound)
+    // Only Turkic (tr and az) languages and Greek require locale-specific
+    // lowercasing rules.
+    icu::UnicodeString transliteratorId;
+    const char* localeForConversion = 0;
+    if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
+        localeForConversion = "tr";
+    else if (localeIdMatchesLang(localeIdentifier, "el"))
+        transliteratorId = UNICODE_STRING_SIMPLE("el-Upper");
+    else if (localeIdMatchesLang(localeIdentifier, "lt"))
+        localeForConversion = "lt";
+    else
          return upper();
  
      if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
          CRASH();
      int length = m_length;
  
-    // Below, we pass in the hardcoded locale "tr". Passing that is more efficient than
-    // allocating memory just to turn localeIdentifier into a C string, and there is no
-    // difference between the uppercasing for "tr" and "az" locales.
      RefPtr<StringImpl> upconverted = upconvertedString();
      const UChar* source16 = upconverted->characters16();
-    UChar* data16;
-    RefPtr<StringImpl> newString = createUninitialized(length, data16);
-    do {
-        UErrorCode status = U_ZERO_ERROR;
-        int realLength = u_strToUpper(data16, length, source16, length, "tr", &status);
-        if (U_SUCCESS(status)) {
-            newString->truncateAssumingIsolated(realLength);
-            return newString.release();
-        }
-        if (status != U_BUFFER_OVERFLOW_ERROR)
-            return this;
-        // Expand the buffer.
-        newString = createUninitialized(realLength, data16);
-    } while (true);
+
+    if (localeForConversion)
+        return caseConvert(source16, length, u_strToUpper, localeForConversion, this);
+
+    // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek.
+    UErrorCode status = U_ZERO_ERROR;
+    OwnPtr<icu::Transliterator> translit =
+        adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FORWARD, status));
+    if (U_FAILURE(status))
+        return upper();
+
+    // target will be copy-on-write.
+    icu::UnicodeString target(false, source16, length);
+    translit->transliterate(target);
+
+    return create(target.getBuffer(), target.length());
  }
  
  PassRefPtr<StringImpl> StringImpl::fill(UChar character)