Bug 566530 - Handle (illegal) unencoded spaces in URIs
authorDan Winship <danw@src.gnome.org>
Mon, 23 Mar 2009 21:39:40 +0000 (21:39 +0000)
committerDan Winship <danw@src.gnome.org>
Mon, 23 Mar 2009 21:39:40 +0000 (21:39 +0000)
* libsoup/soup-uri.c (uri_normalized_copy): optionally fix up
parts with unencoded spaces in them.
(soup_uri_new_with_base): tell uri_normalized_copy() to fix up
spaces in the path and query components

* tests/uri-parsing.c (abs_tests): test parsing and unparsing a
URI with an unencoded space in it.

* tests/redirect-test.c (tests, server_callback): add a test of
redirecting to a URI with an unencoded space in it.

svn path=/trunk/; revision=1252

ChangeLog
libsoup/soup-uri.c
tests/redirect-test.c
tests/uri-parsing.c

index 5581c49..3477375 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2009-03-23  Dan Winship  <danw@gnome.org>
+
+       Bug 566530 - Handle (illegal) unencoded spaces in URIs
+
+       * libsoup/soup-uri.c (uri_normalized_copy): optionally fix up
+       parts with unencoded spaces in them.
+       (soup_uri_new_with_base): tell uri_normalized_copy() to fix up
+       spaces in the path and query components
+
+       * tests/uri-parsing.c (abs_tests): test parsing and unparsing a
+       URI with an unencoded space in it.
+
+       * tests/redirect-test.c (tests, server_callback): add a test of
+       redirecting to a URI with an unencoded space in it.
+
 2009-03-15  Dan Winship  <danw@gnome.org>
 
        * configure.in: 2.26.0
index ff923ad..2743576 100644 (file)
@@ -93,7 +93,7 @@
 
 static void append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars);
 static char *uri_decoded_copy (const char *str, int length);
-static char *uri_normalized_copy (const char *str, int length, const char *unescape_extra);
+static char *uri_normalized_copy (const char *str, int length, const char *unescape_extra, gboolean fixup);
 
 const char *_SOUP_URI_SCHEME_HTTP, *_SOUP_URI_SCHEME_HTTPS;
 
@@ -152,7 +152,7 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string)
        end = hash = strchr (uri_string, '#');
        if (hash && hash[1]) {
                uri->fragment = uri_normalized_copy (hash + 1, strlen (hash + 1),
-                                                    NULL);
+                                                    NULL, FALSE);
                if (!uri->fragment) {
                        soup_uri_free (uri);
                        return NULL;
@@ -249,7 +249,7 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string)
                if (question[1]) {
                        uri->query = uri_normalized_copy (question + 1,
                                                          end - (question + 1),
-                                                         NULL);
+                                                         NULL, TRUE);
                        if (!uri->query) {
                                soup_uri_free (uri);
                                return NULL;
@@ -260,7 +260,7 @@ soup_uri_new_with_base (SoupURI *base, const char *uri_string)
 
        if (end != uri_string) {
                uri->path = uri_normalized_copy (uri_string, end - uri_string,
-                                                NULL);
+                                                NULL, TRUE);
                if (!uri->path) {
                        soup_uri_free (uri);
                        return NULL;
@@ -654,10 +654,12 @@ soup_uri_decode (const char *part)
 }
 
 static char *
-uri_normalized_copy (const char *part, int length, const char *unescape_extra)
+uri_normalized_copy (const char *part, int length,
+                    const char *unescape_extra, gboolean fixup)
 {
        unsigned char *s, *d, c;
        char *normalized = g_strndup (part, length);
+       gboolean need_fixup = FALSE;
 
        s = d = (unsigned char *)normalized;
        do {
@@ -678,10 +680,27 @@ uri_normalized_copy (const char *part, int length, const char *unescape_extra)
                                *d++ = g_ascii_toupper (*s++);
                                *d++ = g_ascii_toupper (*s);
                        }
-               } else
+               } else {
+                       if (*s == ' ')
+                               need_fixup = TRUE;
                        *d++ = *s;
+               }
        } while (*s++);
 
+       if (fixup && need_fixup) {
+               char *tmp, *sp;
+               /* This code is lame, but so are people who put
+                * unencoded spaces in URLs!
+                */
+               while ((sp = strchr (normalized, ' '))) {
+                       tmp = g_strdup_printf ("%.*s%%20%s",
+                                              (int)(sp - normalized),
+                                              normalized, sp + 1);
+                       g_free (normalized);
+                       normalized = tmp;
+               };
+       }
+
        return normalized;
 }
 
@@ -709,7 +728,7 @@ uri_normalized_copy (const char *part, int length, const char *unescape_extra)
 char *
 soup_uri_normalize (const char *part, const char *unescape_extra)
 {
-       return uri_normalized_copy (part, strlen (part), unescape_extra);
+       return uri_normalized_copy (part, strlen (part), unescape_extra, FALSE);
 }
 
 
index 5bb37e9..e863068 100644 (file)
@@ -93,6 +93,12 @@ static struct {
            { "GET", "/", 200 },
            { NULL } } },
        { { { "POST", "/307", 307 },
+           { NULL } } },
+
+       /* Test behavior with recoverably-bad Location header
+        */
+       { { { "GET", "/bad", 302 },
+           { "GET", "/bad%20with%20spaces", 200 },
            { NULL } } }
 };
 static const int n_tests = G_N_ELEMENTS (tests);
@@ -206,6 +212,19 @@ server_callback (SoupServer *server, SoupMessage *msg,
        char *remainder;
        guint status_code;
 
+       if (g_str_has_prefix (path, "/bad")) {
+               if (!strcmp (path, "/bad")) {
+                       soup_message_set_status (msg, SOUP_STATUS_FOUND);
+                       soup_message_headers_replace (msg->response_headers,
+                                                     "Location",
+                                                     "/bad with spaces");
+               } else if (!strcmp (path, "/bad with spaces"))
+                       soup_message_set_status (msg, SOUP_STATUS_OK);
+               else
+                       soup_message_set_status (msg, SOUP_STATUS_NOT_FOUND);
+               return;
+       }
+
        if (!strcmp (path, "/")) {
                if (msg->method != SOUP_METHOD_GET &&
                    msg->method != SOUP_METHOD_HEAD) {
index 52d5271..407ddfd 100644 (file)
@@ -57,7 +57,11 @@ static struct {
        { "http://[::FFFF:129.144.52.38]:80/index.html",
          "http://[::FFFF:129.144.52.38]/index.html" },
        { "http://[2010:836B:4179::836B:4179]",
-         "http://[2010:836B:4179::836B:4179]/" }
+         "http://[2010:836B:4179::836B:4179]/" },
+
+       /* Try to recover certain kinds of invalid URIs */
+       { "http://host/path with spaces",
+         "http://host/path%20with%20spaces" }
 };
 static int num_abs_tests = G_N_ELEMENTS(abs_tests);