optimize handling of surrogate pairs in toLower()/toUpper()
authorKonstantin Ritt <ritt.ks@gmail.com>
Tue, 18 Oct 2011 17:12:19 +0000 (19:12 +0200)
committerQt by Nokia <qt-info@nokia.com>
Tue, 21 Feb 2012 21:31:00 +0000 (22:31 +0100)
high surrogate part never changes on upper/lower casing
(which tested explicitly in the qunicodetables generator),
thus we can copy the high surrogate part in the loop

Change-Id: I24642b35eea7596c6ad494f2a7bc670f10b759a8
Merge-request: 70
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@nokia.com>
Reviewed-by: Olivier
Reviewed-by: Olivier Goffart <ogoffart@woboq.com>
src/corelib/tools/qstring.cpp

index ca34096..a141179 100644 (file)
@@ -4071,6 +4071,7 @@ QString QString::simplified() const
             if (from == fromEnd)
                 goto done;
         } while (!ch.isSpace());
+
     }
   done:
     *to++ = ch;
@@ -4891,29 +4892,33 @@ QString QString::toLower() const
     const ushort *p = d->data();
     if (!p)
         return *this;
-    if (!d->size)
-        return *this;
 
-    const ushort *e = d->data() + d->size;
-
-    // this avoids one out of bounds check in the loop
-    if (QChar(*p).isLowSurrogate())
-        ++p;
+    const ushort *e = p + d->size;
+    // this avoids out of bounds check in the loop
+    while (e != p && QChar::isHighSurrogate(*(e - 1)))
+        --e;
 
+    const QUnicodeTables::Properties *prop;
     while (p != e) {
-        uint c = *p;
-        if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate())
-            c = QChar::surrogateToUcs4(*(p - 1), c);
-        const QUnicodeTables::Properties *prop = qGetProp(c);
+        if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+            ushort high = *p++;
+            prop = qGetProp(QChar::surrogateToUcs4(high, *p));
+        } else {
+            prop = qGetProp(*p);
+        }
         if (prop->lowerCaseDiff || prop->lowerCaseSpecial) {
+            if (QChar::isLowSurrogate(*p))
+                --p; // safe; diff is 0 for surrogates
             QString s(d->size, Qt::Uninitialized);
             memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort));
             ushort *pp = s.d->data() + (p - d->data());
-            while (p < e) {
-                uint c = *p;
-                if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate())
-                    c = QChar::surrogateToUcs4(*(p - 1), c);
-                prop = qGetProp(c);
+            while (p != e) {
+                if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+                    *pp = *p++;
+                    prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p));
+                } else {
+                    prop = qGetProp(*p);
+                }
                 if (prop->lowerCaseSpecial) {
                     int pos = pp - s.d->data();
                     s.resize(s.d->size + SPECIAL_CASE_MAX_LEN);
@@ -4926,6 +4931,11 @@ QString QString::toLower() const
                 }
                 ++p;
             }
+
+            // this restores high surrogate parts eaten above, if any
+            while (e != d->data() + d->size)
+                *pp++ = *e++;
+
             s.truncate(pp - s.d->data());
             return s;
         }
@@ -4979,35 +4989,38 @@ QString QString::toCaseFolded() const
 
     \sa toLower(), QLocale::toLower()
 */
-
 QString QString::toUpper() const
 {
     const ushort *p = d->data();
     if (!p)
         return *this;
-    if (!d->size)
-        return *this;
 
-    const ushort *e = d->data() + d->size;
-
-    // this avoids one out of bounds check in the loop
-    if (QChar(*p).isLowSurrogate())
-        ++p;
+    const ushort *e = p + d->size;
+    // this avoids out of bounds check in the loop
+    while (e != p && QChar::isHighSurrogate(*(e - 1)))
+        --e;
 
+    const QUnicodeTables::Properties *prop;
     while (p != e) {
-        uint c = *p;
-        if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate())
-            c = QChar::surrogateToUcs4(*(p - 1), c);
-        const QUnicodeTables::Properties *prop = qGetProp(c);
+        if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+            ushort high = *p++;
+            prop = qGetProp(QChar::surrogateToUcs4(high, *p));
+        } else {
+            prop = qGetProp(*p);
+        }
         if (prop->upperCaseDiff || prop->upperCaseSpecial) {
+            if (QChar::isLowSurrogate(*p))
+                --p; // safe; diff is 0 for surrogates
             QString s(d->size, Qt::Uninitialized);
             memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort));
             ushort *pp = s.d->data() + (p - d->data());
-            while (p < e) {
-                uint c = *p;
-                if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate())
-                    c = QChar::surrogateToUcs4(*(p - 1), c);
-                prop = qGetProp(c);
+            while (p != e) {
+                if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+                    *pp = *p++;
+                    prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p));
+                } else {
+                    prop = qGetProp(*p);
+                }
                 if (prop->upperCaseSpecial) {
                     int pos = pp - s.d->data();
                     s.resize(s.d->size + SPECIAL_CASE_MAX_LEN);
@@ -5020,6 +5033,11 @@ QString QString::toUpper() const
                 }
                 ++p;
             }
+
+            // this restores high surrogate parts eaten above, if any
+            while (e != d->data() + d->size)
+                *pp++ = *e++;
+
             s.truncate(pp - s.d->data());
             return s;
         }