Introduce QUrl::DecodeReserved and reorder the enums
authorThiago Macieira <thiago.macieira@intel.com>
Fri, 30 Mar 2012 16:32:19 +0000 (13:32 -0300)
committerQt by Nokia <qt-info@nokia.com>
Wed, 11 Apr 2012 21:32:04 +0000 (23:32 +0200)
DecodeReserved applies to all characters between 0x21 and 0x7E that
aren't unreserved, a delimiter, or the percent sign itself.

Change-Id: Ie64bddb6b814dfa3bb8380e3aa24de1bb3645a65
Reviewed-by: Shane Kearns <shane.kearns@accenture.com>
src/corelib/io/qurl.h
src/corelib/io/qurlrecode.cpp
tests/auto/corelib/io/qurl/tst_qurl.cpp

index 4eaf652..27b81df 100644 (file)
@@ -140,10 +140,11 @@ public:
     enum ComponentFormattingOption {
         FullyEncoded = 0x000000,
         DecodeSpaces = 0x100000,
-        DecodeDelimiters = 0x200000 | 0x400000,
-        DecodeUnicode = 0x800000,
+        DecodeUnicode = 0x200000,
+        DecodeDelimiters = 0x400000 | 0x800000,
+        DecodeReserved = 0x1000000,
 
-        PrettyDecoded = DecodeSpaces | DecodeDelimiters | DecodeUnicode,
+        PrettyDecoded = DecodeSpaces | DecodeDelimiters | DecodeReserved | DecodeUnicode,
         MostDecoded = PrettyDecoded
     };
     Q_DECLARE_FLAGS(ComponentFormattingOptions, ComponentFormattingOption)
index 50adfa0..02fced5 100644 (file)
@@ -109,6 +109,116 @@ static const uchar defaultActionTable[96] = {
     2  // BSKP
 };
 
+// mask tables, in negative polarity
+// 0x00 if it belongs to this category
+// 0xff if it doesn't
+
+static const uchar delimsMask[96] = {
+    0xff, // space
+    0x00, // '!' (sub-delim)
+    0xff, // '"'
+    0x00, // '#' (gen-delim)
+    0x00, // '$' (gen-delim)
+    0xff, // '%' (percent)
+    0x00, // '&' (gen-delim)
+    0x00, // "'" (sub-delim)
+    0x00, // '(' (sub-delim)
+    0x00, // ')' (sub-delim)
+    0x00, // '*' (sub-delim)
+    0x00, // '+' (sub-delim)
+    0x00, // ',' (sub-delim)
+    0xff, // '-' (unreserved)
+    0xff, // '.' (unreserved)
+    0x00, // '/' (gen-delim)
+
+    0xff, 0xff, 0xff, 0xff, 0xff,  // '0' to '4' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // '5' to '9' (unreserved)
+    0x00, // ':' (gen-delim)
+    0x00, // ';' (sub-delim)
+    0xff, // '<'
+    0x00, // '=' (sub-delim)
+    0xff, // '>'
+    0x00, // '?' (gen-delim)
+
+    0x00, // '@' (gen-delim)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'A' to 'E' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'F' to 'J' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'K' to 'O' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'P' to 'T' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  // 'U' to 'Z' (unreserved)
+    0x00, // '[' (gen-delim)
+    0xff, // '\'
+    0x00, // ']' (gen-delim)
+    0xff, // '^'
+    0xff, // '_' (unreserved)
+
+    0xff, // '`'
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'a' to 'e' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'f' to 'j' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'k' to 'o' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'p' to 't' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  // 'u' to 'z' (unreserved)
+    0xff, // '{'
+    0xff, // '|'
+    0xff, // '}'
+    0xff, // '~' (unreserved)
+
+    0xff  // BSKP
+};
+
+static const uchar reservedMask[96] = {
+    0xff, // space
+    0xff, // '!' (sub-delim)
+    0x00, // '"'
+    0xff, // '#' (gen-delim)
+    0xff, // '$' (gen-delim)
+    0xff, // '%' (percent)
+    0xff, // '&' (gen-delim)
+    0xff, // "'" (sub-delim)
+    0xff, // '(' (sub-delim)
+    0xff, // ')' (sub-delim)
+    0xff, // '*' (sub-delim)
+    0xff, // '+' (sub-delim)
+    0xff, // ',' (sub-delim)
+    0xff, // '-' (unreserved)
+    0xff, // '.' (unreserved)
+    0xff, // '/' (gen-delim)
+
+    0xff, 0xff, 0xff, 0xff, 0xff,  // '0' to '4' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // '5' to '9' (unreserved)
+    0xff, // ':' (gen-delim)
+    0xff, // ';' (sub-delim)
+    0x00, // '<'
+    0xff, // '=' (sub-delim)
+    0x00, // '>'
+    0xff, // '?' (gen-delim)
+
+    0xff, // '@' (gen-delim)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'A' to 'E' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'F' to 'J' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'K' to 'O' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'P' to 'T' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  // 'U' to 'Z' (unreserved)
+    0xff, // '[' (gen-delim)
+    0x00, // '\'
+    0xff, // ']' (gen-delim)
+    0x00, // '^'
+    0xff, // '_' (unreserved)
+
+    0x00, // '`'
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'a' to 'e' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'f' to 'j' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'k' to 'o' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff,  // 'p' to 't' (unreserved)
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff,  // 'u' to 'z' (unreserved)
+    0x00, // '{'
+    0x00, // '|'
+    0x00, // '}'
+    0xff, // '~' (unreserved)
+
+    0xff  // BSKP
+};
+
 static inline bool isHex(ushort c)
 {
     return (c >= 'a' && c <= 'f') ||
@@ -464,12 +574,19 @@ non_trivial:
     return 0;
 }
 
+template <size_t N>
+static void maskTable(uchar (&table)[N], const uchar (&mask)[N])
+{
+    for (size_t i = 0; i < N; ++i)
+        table[i] &= mask[i];
+}
+
 Q_AUTOTEST_EXPORT int
 qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end,
              QUrl::ComponentFormattingOptions encoding, const ushort *tableModifications)
 {
     uchar actionTable[sizeof defaultActionTable];
-    if (encoding & QUrl::DecodeDelimiters) {
+    if (encoding & QUrl::DecodeDelimiters && encoding & QUrl::DecodeReserved) {
         // reset the table
         memset(actionTable, DecodeCharacter, sizeof actionTable);
         if (!(encoding & QUrl::DecodeSpaces))
@@ -480,6 +597,10 @@ qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end,
         actionTable[0x7F - ' '] = EncodeCharacter;
     } else {
         memcpy(actionTable, defaultActionTable, sizeof actionTable);
+        if (encoding & QUrl::DecodeDelimiters)
+            maskTable(actionTable, delimsMask);
+        if (encoding & QUrl::DecodeReserved)
+            maskTable(actionTable, reservedMask);
         if (encoding & QUrl::DecodeSpaces)
             actionTable[0] = DecodeCharacter; // decode
     }
index aae970e..7deb0fc 100644 (file)
@@ -2713,9 +2713,9 @@ void tst_QUrl::componentEncodings_data()
                                             << QString() << "%21%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D" << QString()
                                             << "?%21%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D";
 
-    // other characters:  '"' / "<" / ">" / "^" / "\" / "{" / "|" "}"
+    // reserved characters:  '"' / "<" / ">" / "^" / "\" / "{" / "|" "}"
     // the RFC does not allow them undecoded anywhere, but we do
-    QTest::newRow("encoded-others") << QUrl("x://\"<>^\\{|}:\"<>^\\{|}@host/\"<>^\\{|}?\"<>^\\{|}#\"<>^\\{|}")
+    QTest::newRow("encoded-reserved") << QUrl("x://\"<>^\\{|}:\"<>^\\{|}@host/\"<>^\\{|}?\"<>^\\{|}#\"<>^\\{|}")
                                     << int(QUrl::FullyEncoded)
                                     << "%22%3C%3E%5E%5C%7B%7C%7D" << "%22%3C%3E%5E%5C%7B%7C%7D"
                                     << "%22%3C%3E%5E%5C%7B%7C%7D:%22%3C%3E%5E%5C%7B%7C%7D"
@@ -2724,9 +2724,9 @@ void tst_QUrl::componentEncodings_data()
                                     << "%22%3C%3E%5E%5C%7B%7C%7D"
                                     << "x://%22%3C%3E%5E%5C%7B%7C%7D:%22%3C%3E%5E%5C%7B%7C%7D@host/%22%3C%3E%5E%5C%7B%7C%7D"
                                        "?%22%3C%3E%5E%5C%7B%7C%7D#%22%3C%3E%5E%5C%7B%7C%7D";
-    QTest::newRow("decoded-others") << QUrl("x://%22%3C%3E%5E%5C%7B%7C%7D:%22%3C%3E%5E%5C%7B%7C%7D@host"
+    QTest::newRow("decoded-reserved") << QUrl("x://%22%3C%3E%5E%5C%7B%7C%7D:%22%3C%3E%5E%5C%7B%7C%7D@host"
                                             "/%22%3C%3E%5E%5C%7B%7C%7D?%22%3C%3E%5E%5C%7B%7C%7D#%22%3C%3E%5E%5C%7B%7C%7D")
-                                    << int(QUrl::DecodeDelimiters)
+                                    << int(QUrl::DecodeReserved)
                                     << "\"<>^\\{|}" << "\"<>^\\{|}" << "\"<>^\\{|}:\"<>^\\{|}"
                                     << "host" << "\"<>^\\{|}:\"<>^\\{|}@host"
                                     << "/\"<>^\\{|}" << "\"<>^\\{|}" << "\"<>^\\{|}"