workaround issue where casing diff overflows signed short
authorKonstantin Ritt <ritt.ks@gmail.com>
Mon, 23 Apr 2012 15:11:57 +0000 (18:11 +0300)
committerQt by Nokia <qt-info@nokia.com>
Tue, 24 Apr 2012 19:45:00 +0000 (21:45 +0200)
there are two such codepoints were added in the Unicode 5.1:
  U+1D79  LATIN SMALL LETTER INSULAR G
  U+A77D  LATIN CAPITAL LETTER INSULAR G
two more of them were added in the Unicode 6.0:
  U+0265  LATIN SMALL LETTER TURNED H
  U+A78D  LATIN CAPITAL LETTER TURNED H
and two more were added in the Unicode 6.1:
  U+0266  LATIN SMALL LETTER H WITH HOOK
  U+A7AA  LATIN CAPITAL LETTER H WITH HOOK

we map them like special cases with length == 1
(note: all are in BMP which is checked explicitly in the generator)

Change-Id: I8a34164eb3ee2e575b7799cc12d4b96ad5bcd9c6
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
src/corelib/tools/qchar.cpp
src/corelib/tools/qstring.cpp
src/corelib/tools/qunicodetables_p.h
util/unicode/main.cpp

index 3034e43..74df59a 100644 (file)
@@ -1053,33 +1053,48 @@ template <typename T>
 static inline T toLowerCase_helper(T uc)
 {
     const QUnicodeTables::Properties *p = qGetProp(uc);
-    if (!p->lowerCaseSpecial)
-        return uc + p->lowerCaseDiff;
-    return uc;
+    if (p->lowerCaseSpecial) {
+        const ushort *specialCase = specialCaseMap + p->lowerCaseDiff;
+        if (*specialCase == 1)
+            return specialCase[1];
+    }
+    return uc + p->lowerCaseDiff;
 }
 
 template <typename T>
 static inline T toUpperCase_helper(T uc)
 {
     const QUnicodeTables::Properties *p = qGetProp(uc);
-    if (!p->upperCaseSpecial)
-        return uc + p->upperCaseDiff;
-    return uc;
+    if (p->upperCaseSpecial) {
+        const ushort *specialCase = specialCaseMap + p->upperCaseDiff;
+        if (*specialCase == 1)
+            return specialCase[1];
+    }
+    return uc + p->upperCaseDiff;
 }
 
 template <typename T>
 static inline T toTitleCase_helper(T uc)
 {
     const QUnicodeTables::Properties *p = qGetProp(uc);
-    if (!p->titleCaseSpecial)
-        return uc + p->titleCaseDiff;
-    return uc;
+    if (p->titleCaseSpecial) {
+        const ushort *specialCase = specialCaseMap + p->titleCaseDiff;
+        if (*specialCase == 1)
+            return specialCase[1];
+    }
+    return uc + p->titleCaseDiff;
 }
 
 template <typename T>
 static inline T toCaseFolded_helper(T uc)
 {
-    return uc + qGetProp(uc)->caseFoldDiff;
+    const QUnicodeTables::Properties *p = qGetProp(uc);
+    if (p->caseFoldSpecial) {
+        const ushort *specialCase = specialCaseMap + p->caseFoldDiff;
+        if (*specialCase == 1)
+            return specialCase[1];
+    }
+    return uc + p->caseFoldDiff;
 }
 
 /*!
index 65b52f3..39747aa 100644 (file)
@@ -5263,7 +5263,20 @@ QString QString::toCaseFolded() const
                     prop = qGetProp(*p);
                 }
                 if (prop->caseFoldSpecial) {
+                    const ushort *specialCase = specialCaseMap + prop->caseFoldDiff;
+                    ushort length = *specialCase++;
+#if 0
+                    int pos = pp - s.d->data;
+                    s.resize(s.d->size + length - 1);
+                    pp = s.d->data + pos;
+                    while (length--)
+                        *pp++ = *specialCase++;
+#else
                     //### we currently don't support full case foldings
+                    Q_ASSERT(length == 1);
+                    Q_UNUSED(length)
+                    *pp++ = *specialCase;
+#endif
                 } else {
                     *pp++ = *p + prop->caseFoldDiff;
                 }
index 0b81150..d8f8340 100644 (file)
@@ -76,7 +76,7 @@ namespace QUnicodeTables {
         ushort lowerCaseSpecial : 1;
         ushort upperCaseSpecial : 1;
         ushort titleCaseSpecial : 1;
-        ushort caseFoldSpecial  : 1; /* currently unused */
+        ushort caseFoldSpecial  : 1;
         signed short mirrorDiff    : 16;
         signed short lowerCaseDiff : 16;
         signed short upperCaseDiff : 16;
index d769176..488026d 100644 (file)
@@ -383,7 +383,7 @@ static const char *property_string =
     "        ushort lowerCaseSpecial : 1;\n"
     "        ushort upperCaseSpecial : 1;\n"
     "        ushort titleCaseSpecial : 1;\n"
-    "        ushort caseFoldSpecial  : 1; /* currently unused */\n"
+    "        ushort caseFoldSpecial  : 1;\n"
     "        signed short mirrorDiff    : 16;\n"
     "        signed short lowerCaseDiff : 16;\n"
     "        signed short upperCaseDiff : 16;\n"
@@ -785,10 +785,16 @@ static void readUnicodeData()
             int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
             Q_ASSERT(ok);
             int diff = upperCase - codepoint;
-            if (qAbs(diff) >= (1<<14))
-                qWarning() << "upperCaseDiff exceeded (" << hex << codepoint << "->" << upperCase << ")";
-            data.p.upperCaseDiff = diff;
-            maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(diff));
+            if (qAbs(diff) >= (1<<14)) {
+                qWarning() << "upperCaseDiff exceeded (" << hex << codepoint << "->" << upperCase << "); map it for special case";
+                // if the condition below doesn't hold anymore we need to modify our special upper casing code in qchar.cpp
+                Q_ASSERT(!QChar::requiresSurrogates(codepoint) && !QChar::requiresSurrogates(upperCase));
+                data.p.upperCaseSpecial = true;
+                data.p.upperCaseDiff = appendToSpecialCaseMap(QList<int>() << upperCase);
+            } else {
+                data.p.upperCaseDiff = diff;
+                maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(diff));
+            }
             if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(upperCase)) {
                 // if the conditions below doesn't hold anymore we need to modify our upper casing code
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase));
@@ -799,10 +805,16 @@ static void readUnicodeData()
             int lowerCase = properties[UD_LowerCase].toInt(&ok, 16);
             Q_ASSERT(ok);
             int diff = lowerCase - codepoint;
-            if (qAbs(diff) >= (1<<14))
-                qWarning() << "lowerCaseDiff exceeded (" << hex << codepoint << "->" << lowerCase << ")";
-            data.p.lowerCaseDiff = diff;
-            maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(diff));
+            if (qAbs(diff) >= (1<<14)) {
+                qWarning() << "lowerCaseDiff exceeded (" << hex << codepoint << "->" << lowerCase << "); map it for special case";
+                // if the condition below doesn't hold anymore we need to modify our special lower casing code in qchar.cpp
+                Q_ASSERT(!QChar::requiresSurrogates(codepoint) && !QChar::requiresSurrogates(lowerCase));
+                data.p.lowerCaseSpecial = true;
+                data.p.lowerCaseDiff = appendToSpecialCaseMap(QList<int>() << lowerCase);
+            } else {
+                data.p.lowerCaseDiff = diff;
+                maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(diff));
+            }
             if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(lowerCase)) {
                 // if the conditions below doesn't hold anymore we need to modify our lower casing code
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(lowerCase));
@@ -816,10 +828,16 @@ static void readUnicodeData()
             int titleCase = properties[UD_TitleCase].toInt(&ok, 16);
             Q_ASSERT(ok);
             int diff = titleCase - codepoint;
-            if (qAbs(diff) >= (1<<14))
-                qWarning() << "titleCaseDiff exceeded (" << hex << codepoint << "->" << titleCase << ")";
-            data.p.titleCaseDiff = diff;
-            maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(diff));
+            if (qAbs(diff) >= (1<<14)) {
+                qWarning() << "titleCaseDiff exceeded (" << hex << codepoint << "->" << titleCase << "); map it for special case";
+                // if the condition below doesn't hold anymore we need to modify our special title casing code in qchar.cpp
+                Q_ASSERT(!QChar::requiresSurrogates(codepoint) && !QChar::requiresSurrogates(titleCase));
+                data.p.titleCaseSpecial = true;
+                data.p.titleCaseDiff = appendToSpecialCaseMap(QList<int>() << titleCase);
+            } else {
+                data.p.titleCaseDiff = diff;
+                maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(diff));
+            }
             if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(titleCase)) {
                 // if the conditions below doesn't hold anymore we need to modify our title casing code
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(titleCase));
@@ -1355,10 +1373,16 @@ static void readCaseFolding()
         if (foldMap.size() == 1) {
             int caseFolded = foldMap.at(0);
             int diff = caseFolded - codepoint;
-            if (qAbs(diff) >= (1<<14))
-                qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << caseFolded << ")";
-            ud.p.caseFoldDiff = diff;
-            maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(diff));
+            if (qAbs(diff) >= (1<<14)) {
+                qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << caseFolded << "); map it for special case";
+                // if the condition below doesn't hold anymore we need to modify our special case folding code in qchar.cpp
+                Q_ASSERT(!QChar::requiresSurrogates(codepoint) && !QChar::requiresSurrogates(caseFolded));
+                ud.p.caseFoldSpecial = true;
+                ud.p.caseFoldDiff = appendToSpecialCaseMap(foldMap);
+            } else {
+                ud.p.caseFoldDiff = diff;
+                maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(diff));
+            }
             if (QChar::requiresSurrogates(codepoint) || QChar::requiresSurrogates(caseFolded)) {
                 // if the conditions below doesn't hold anymore we need to modify our case folding code
                 Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(caseFolded));