omit unassigned (and too recent) codepoints from the text Normalization process
authorRitt Konstantin <ritt.ks@gmail.com>
Sat, 16 Jul 2011 00:45:28 +0000 (04:45 +0400)
committerQt by Nokia <qt-info@nokia.com>
Thu, 1 Sep 2011 14:14:18 +0000 (16:14 +0200)
http://www.unicode.org/reports/tr15/#Guaranteeing_Process_Stability:
> handle any code points that were not defined in the earlier version
> as if they were unassigned: such code points will not decompose or compose,
> and their Canonical_Combining_Class value will be zero.

since QChar::Unicode_Unassigned value is 0, it's less than any other
QChar::UnicodeVersion value and must ba handled explicitly

Change-Id: I6df025b4173d407660adae77ec5eeb98d15cb8ce
Reviewed-on: http://codereview.qt.nokia.com/4084
Reviewed-by: Qt Sanity Bot <qt_sanity_bot@ovi.com>
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
src/corelib/tools/qchar.cpp
src/corelib/tools/qstring.cpp

index 736bc63..b68da9d 100644 (file)
@@ -1402,7 +1402,8 @@ static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion
                 ucs4 = QChar::surrogateToUcs4(high, ucs4);
             }
         }
-        if (QChar::unicodeVersion(ucs4) > version)
+        const QChar::UnicodeVersion v = QChar::unicodeVersion(ucs4);
+        if (v > version || v == QChar::Unicode_Unassigned)
             continue;
         int length;
         int tag;
@@ -1462,7 +1463,7 @@ static ushort ligatureHelper(ushort u1, ushort u2)
     return 0;
 }
 
-static void composeHelper(QString *str, int from)
+static void composeHelper(QString *str, QChar::UnicodeVersion version, int from)
 {
     QString &s = *str;
 
@@ -1482,7 +1483,14 @@ static void composeHelper(QString *str, int from)
                 ++pos;
             }
         }
-        int combining = QChar::combiningClass(uc);
+        const QUnicodeTables::Properties *p = qGetProp(uc);
+        if (p->unicodeVersion > version || p->unicodeVersion == QChar::Unicode_Unassigned) {
+            starter = -1; // to prevent starter == pos - 1
+            lastCombining = 0;
+            ++pos;
+            continue;
+        }
+        int combining = p->combiningClass;
         if (starter == pos - 1 || combining > lastCombining) {
             // allowed to form ligature with S
             QChar ligature = ligatureHelper(s.at(starter).unicode(), uc);
@@ -1529,7 +1537,7 @@ static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, in
         ushort c2 = 0;
         {
             const QUnicodeTables::Properties *p = qGetProp(u2);
-            if ((QChar::UnicodeVersion)p->unicodeVersion <= version)
+            if (p->unicodeVersion <= version && p->unicodeVersion != QChar::Unicode_Unassigned)
                 c2 = p->combiningClass;
         }
         if (c2 == 0) {
@@ -1540,7 +1548,7 @@ static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, in
         ushort c1 = 0;
         {
             const QUnicodeTables::Properties *p = qGetProp(u1);
-            if ((QChar::UnicodeVersion)p->unicodeVersion <= version)
+            if (p->unicodeVersion <= version && p->unicodeVersion != QChar::Unicode_Unassigned)
                 c1 = p->combiningClass;
         }
 
index 78b1b59..c56c050 100644 (file)
@@ -6230,7 +6230,7 @@ void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::
     if (mode == QString::NormalizationForm_D || mode == QString::NormalizationForm_KD)
         return;
 
-    composeHelper(data, from);
+    composeHelper(data, version, from);
 }