QRegExp: fix \i \I \c \C \p \P escape sequences
authorGiuseppe D'Angelo <dangelog@gmail.com>
Wed, 29 Feb 2012 00:02:53 +0000 (00:02 +0000)
committerQt by Nokia <qt-info@nokia.com>
Wed, 29 Feb 2012 23:09:57 +0000 (00:09 +0100)
Those escape sequences have a special meaning in the XML Schema 1.1
regular expressions, but not in Perl-compatible ones.

An escape sequence that has no special meaning should match the
escaped character itself; this patch fixes QRegExp's behaviour in
that regard (previously, it added a character class matching
nothing).

Change-Id: I983f923baa7c2ec19938b96353f3a205e6c06d58
Reviewed-by: Andy Shaw <andy.shaw@digia.com>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
src/corelib/tools/qregexp.cpp
tests/auto/corelib/tools/qregexp/tst_qregexp.cpp

index d7bcd0e..e55144e 100644 (file)
@@ -3015,6 +3015,8 @@ int QRegExpEngine::getEscape()
         if (xmlSchemaExtensions) {
             yyCharClass->setNegative(!yyCharClass->negative());
             // fall through
+        } else {
+            break;
         }
     case 'i':
         if (xmlSchemaExtensions) {
@@ -3045,12 +3047,16 @@ int QRegExpEngine::getEscape()
             yyCharClass->addRange(0xf900, 0xfdcf);
             yyCharClass->addRange(0xfdf0, 0xfffd);
             yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff);
+            return Tok_CharClass;
+        } else {
+            break;
         }
-        return Tok_CharClass;
     case 'C':
         if (xmlSchemaExtensions) {
             yyCharClass->setNegative(!yyCharClass->negative());
             // fall through
+        } else {
+            break;
         }
     case 'c':
         if (xmlSchemaExtensions) {
@@ -3087,12 +3093,16 @@ int QRegExpEngine::getEscape()
             yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff);
             yyCharClass->addRange(0x0300, 0x036f);
             yyCharClass->addRange(0x203f, 0x2040);
+            return Tok_CharClass;
+        } else {
+            break;
         }
-        return Tok_CharClass;
     case 'P':
         if (xmlSchemaExtensions) {
             yyCharClass->setNegative(!yyCharClass->negative());
             // fall through
+        } else {
+            break;
         }
     case 'p':
         if (xmlSchemaExtensions) {
@@ -3246,8 +3256,10 @@ int QRegExpEngine::getEscape()
             } else {
                 error(RXERR_CATEGORY);
             }
+            return Tok_CharClass;
+        } else {
+            break;
         }
-        return Tok_CharClass;
 #endif
 #ifndef QT_NO_REGEXP_ESCAPE
     case 'x':
@@ -3265,20 +3277,21 @@ int QRegExpEngine::getEscape()
         return Tok_Char | val;
 #endif
     default:
-        if (prevCh >= '1' && prevCh <= '9') {
+        break;
+    }
+    if (prevCh >= '1' && prevCh <= '9') {
 #ifndef QT_NO_REGEXP_BACKREF
-            val = prevCh - '0';
-            while (yyCh >= '0' && yyCh <= '9') {
-                val = (val * 10) + (yyCh - '0');
-                yyCh = getChar();
-            }
-            return Tok_BackRef | val;
+        val = prevCh - '0';
+        while (yyCh >= '0' && yyCh <= '9') {
+            val = (val * 10) + (yyCh - '0');
+            yyCh = getChar();
+        }
+        return Tok_BackRef | val;
 #else
-            error(RXERR_DISABLED);
+        error(RXERR_DISABLED);
 #endif
-        }
-        return Tok_Char | prevCh;
     }
+    return Tok_Char | prevCh;
 }
 
 #ifndef QT_NO_REGEXP_INTERVAL
index a697e23..2995c18 100644 (file)
@@ -81,6 +81,7 @@ private slots:
     void interval();
     void validityCheck_data();
     void validityCheck();
+    void escapeSequences();
 };
 
 // Testing get/set functions
@@ -1373,6 +1374,29 @@ void tst_QRegExp::validityCheck()
     QCOMPARE(rx2.cap(), QString(""));
 }
 
+void tst_QRegExp::escapeSequences()
+{
+    QString perlSyntaxSpecialChars("0123456789afnrtvbBdDwWsSx\\|[]{}()^$?+*");
+    QString w3cXmlSchema11SyntaxSpecialChars("cCiIpP"); // as well as the perl ones
+    for (int i = ' '; i <= 127; ++i) {
+        QLatin1Char c(i);
+        if (perlSyntaxSpecialChars.indexOf(c) == -1) {
+            QRegExp rx(QString("\\%1").arg(c), Qt::CaseSensitive, QRegExp::RegExp);
+            // we'll never have c == 'a' since it's a special character
+            QString s = QString("aaa%1aaa").arg(c);
+            QCOMPARE(rx.indexIn(s), 3);
+
+            rx.setPatternSyntax(QRegExp::RegExp2);
+            QCOMPARE(rx.indexIn(s), 3);
+
+            if (w3cXmlSchema11SyntaxSpecialChars.indexOf(c) == -1) {
+                rx.setPatternSyntax(QRegExp::W3CXmlSchema11);
+                QCOMPARE(rx.indexIn(s), 3);
+            }
+        }
+    }
+}
+
 
 QTEST_APPLESS_MAIN(tst_QRegExp)
 #include "tst_qregexp.moc"