SoupContentDecoder: deal with broken apache Content-Encoding
authorDan Winship <danw@gnome.org>
Sun, 22 May 2011 14:01:30 +0000 (10:01 -0400)
committerDan Winship <danw@gnome.org>
Sun, 22 May 2011 14:03:26 +0000 (10:03 -0400)
When sending a .gz file, Apache will (by default) incorrectly claim
"Content-Encoding: gzip", causing us to do the wrong thing with it.
Work around this bug in the same way Firefox does.

https://bugzilla.gnome.org/show_bug.cgi?id=613361

libsoup/soup-content-decoder.c
tests/coding-test.c

index d7ddd77..3ab240c 100644 (file)
@@ -131,6 +131,16 @@ soup_content_decoder_got_headers_cb (SoupMessage *msg, SoupContentDecoder *decod
        if (!header)
                return;
 
+       /* Workaround for an apache bug (bgo 613361) */
+       if (!g_ascii_strcasecmp (header, "gzip")) {
+               const char *content_type = soup_message_headers_get_content_type (msg->response_headers, NULL);
+
+               if (content_type &&
+                   (!g_ascii_strcasecmp (content_type, "application/gzip") ||
+                    !g_ascii_strcasecmp (content_type, "application/x-gzip")))
+                       return;
+       }
+
        /* OK, really, no one is ever going to use more than one
         * encoding, but we'll be robust.
         */
index f3bd852..14b046b 100644 (file)
@@ -26,14 +26,19 @@ server_callback (SoupServer *server, SoupMessage *msg,
                 const char *path, GHashTable *query,
                 SoupClientContext *context, gpointer data)
 {
-       const char *accept_encoding, *junk, *noencode;
+       const char *accept_encoding, *options;
        GSList *codings;
        char *file = NULL, *contents;
        gsize length;
 
+       options = soup_message_headers_get_one (msg->request_headers,
+                                               "X-Test-Options");
+       if (!options)
+               options = "";
+
        accept_encoding = soup_message_headers_get_list (msg->request_headers,
                                                         "Accept-Encoding");
-       if (accept_encoding)
+       if (accept_encoding && !soup_header_contains (options, "force-encode"))
                codings = soup_header_parse_quality_list (accept_encoding, NULL);
        else
                codings = NULL;
@@ -51,16 +56,6 @@ server_callback (SoupServer *server, SoupMessage *msg,
        }
        soup_header_free_list (codings);
 
-       noencode = soup_message_headers_get_one (msg->request_headers,
-                                                "X-No-Encode");
-       if (noencode) {
-               /* Force it to send the ungzipped version, even though
-                * we already added "Content-Encoding: gzip"
-                */
-               g_free (file);
-               file = NULL;
-       }
-
        if (!file)
                file = g_strdup_printf (SRCDIR "/resources%s", path);
        if (!g_file_get_contents (file, &contents, &length, NULL)) {
@@ -74,15 +69,30 @@ server_callback (SoupServer *server, SoupMessage *msg,
        }
        g_free (file);
 
+       if (soup_header_contains (options, "force-encode")) {
+               soup_message_headers_replace (msg->response_headers,
+                                             "Content-Encoding",
+                                             "gzip");
+       }
+
+       /* Content-Type matches the "real" format, not the sent format */
+       if (g_str_has_suffix (path, ".gz")) {
+               soup_message_headers_append (msg->response_headers,
+                                            "Content-Type",
+                                            "application/gzip");
+       } else {
+               soup_message_headers_append (msg->response_headers,
+                                            "Content-Type",
+                                            "text/plain");
+       }
+
        soup_message_set_status (msg, SOUP_STATUS_OK);
        soup_message_body_append (msg->response_body,
                                  SOUP_MEMORY_TAKE, contents, length);
 
-       junk = soup_message_headers_get_one (msg->request_headers,
-                                            "X-Trailing-Junk");
-       if (junk) {
+       if (soup_header_contains (options, "trailing-junk")) {
                soup_message_body_append (msg->response_body, SOUP_MEMORY_COPY,
-                                         junk, strlen (junk));
+                                         options, strlen (options));
        }
 }
 
@@ -90,9 +100,9 @@ static void
 do_coding_test (void)
 {
        SoupSession *session;
-       SoupMessage *msg, *msgz, *msgj, *msgn;
+       SoupMessage *msg, *msgz, *msgj, *msge;
        SoupURI *uri;
-       const char *coding;
+       const char *coding, *type;
 
        session = soup_test_session_new (SOUP_TYPE_SESSION_ASYNC, NULL);
        uri = soup_uri_new_with_base (base_uri, "/mbox");
@@ -116,7 +126,12 @@ do_coding_test (void)
                debug_printf (1, "  SOUP_MESSAGE_CONTENT_DECODED set!\n");
                errors++;
        }
-
+       type = soup_message_headers_get_one (msg->response_headers, "Content-Type");
+       if (!type || g_ascii_strcasecmp (type, "text/plain") != 0) {
+               debug_printf (1, "  Unexpected Content-Type: %s\n",
+                             type ? type : "(none)");
+               errors++;
+       }
 
        debug_printf (1, "GET /mbox, Accept-Encoding: gzip\n");
        soup_session_add_feature_by_type (session, SOUP_TYPE_CONTENT_DECODER);
@@ -137,6 +152,12 @@ do_coding_test (void)
                debug_printf (1, "  SOUP_MESSAGE_CONTENT_DECODED not set!\n");
                errors++;
        }
+       type = soup_message_headers_get_one (msgz->response_headers, "Content-Type");
+       if (!type || g_ascii_strcasecmp (type, "text/plain") != 0) {
+               debug_printf (1, "  Unexpected Content-Type: %s\n",
+                             type ? type : "(none)");
+               errors++;
+       }
 
        if (msg->response_body->length != msgz->response_body->length) {
                debug_printf (1, "  Message length mismatch: %lu (plain) vs %lu (compressed)\n",
@@ -154,7 +175,7 @@ do_coding_test (void)
        debug_printf (1, "GET /mbox, Accept-Encoding: gzip, plus trailing junk\n");
        msgj = soup_message_new_from_uri ("GET", uri);
        soup_message_headers_append (msgj->request_headers,
-                                    "X-Trailing-Junk", "junk!");
+                                    "X-Test-Options", "trailing-junk");
        soup_session_send_message (session, msgj);
        if (!SOUP_STATUS_IS_SUCCESSFUL (msgj->status_code)) {
                debug_printf (1, "  Unexpected status %d %s\n",
@@ -171,6 +192,12 @@ do_coding_test (void)
                debug_printf (1, "  SOUP_MESSAGE_CONTENT_DECODED not set!\n");
                errors++;
        }
+       type = soup_message_headers_get_one (msgj->response_headers, "Content-Type");
+       if (!type || g_ascii_strcasecmp (type, "text/plain") != 0) {
+               debug_printf (1, "  Unexpected Content-Type: %s\n",
+                             type ? type : "(none)");
+               errors++;
+       }
 
        if (msg->response_body->length != msgj->response_body->length) {
                debug_printf (1, "  Message length mismatch: %lu (plain) vs %lu (compressed w/ junk)\n",
@@ -186,16 +213,16 @@ do_coding_test (void)
 
 
        debug_printf (1, "GET /mbox, Accept-Encoding: gzip, with server error\n");
-       msgn = soup_message_new_from_uri ("GET", uri);
-       soup_message_headers_append (msgn->request_headers,
-                                    "X-No-Encode", "true");
-       soup_session_send_message (session, msgn);
-       if (!SOUP_STATUS_IS_SUCCESSFUL (msgn->status_code)) {
+       msge = soup_message_new_from_uri ("GET", uri);
+       soup_message_headers_append (msge->request_headers,
+                                    "X-Test-Options", "force-encode");
+       soup_session_send_message (session, msge);
+       if (!SOUP_STATUS_IS_SUCCESSFUL (msge->status_code)) {
                debug_printf (1, "  Unexpected status %d %s\n",
-                             msgn->status_code, msgn->reason_phrase);
+                             msge->status_code, msge->reason_phrase);
                errors++;
        }
-       coding = soup_message_headers_get_one (msgn->response_headers, "Content-Encoding");
+       coding = soup_message_headers_get_one (msge->response_headers, "Content-Encoding");
        if (!coding || g_ascii_strcasecmp (coding, "gzip") != 0) {
                debug_printf (1, "  Unexpected Content-Encoding: %s\n",
                              coding ? coding : "(none)");
@@ -204,21 +231,28 @@ do_coding_test (void)
        /* Since the content wasn't actually gzip-encoded, decoding it
         * should have failed and so the flag won't be set.
         */
-       if (soup_message_get_flags (msgn) & SOUP_MESSAGE_CONTENT_DECODED) {
+       if (soup_message_get_flags (msge) & SOUP_MESSAGE_CONTENT_DECODED) {
                debug_printf (1, "  SOUP_MESSAGE_CONTENT_DECODED set!\n");
                errors++;
        }
+       type = soup_message_headers_get_one (msge->response_headers, "Content-Type");
+       if (!type || g_ascii_strcasecmp (type, "text/plain") != 0) {
+               debug_printf (1, "  Unexpected Content-Type: %s\n",
+                             type ? type : "(none)");
+               errors++;
+       }
+
        /* Failed content-decoding should have left the body untouched
         * from what the server sent... which happens to be the
         * uncompressed data.
         */
-       if (msg->response_body->length != msgn->response_body->length) {
+       if (msg->response_body->length != msge->response_body->length) {
                debug_printf (1, "  Message length mismatch: %lu (plain) vs %lu (mis-encoded)\n",
                              (gulong)msg->response_body->length,
-                             (gulong)msgn->response_body->length);
+                             (gulong)msge->response_body->length);
                errors++;
        } else if (memcmp (msg->response_body->data,
-                          msgn->response_body->data,
+                          msge->response_body->data,
                           msg->response_body->length) != 0) {
                debug_printf (1, "  Message data mismatch (plain/misencoded)\n");
                errors++;
@@ -228,7 +262,79 @@ do_coding_test (void)
        g_object_unref (msg);
        g_object_unref (msgz);
        g_object_unref (msgj);
-       g_object_unref (msgn);
+       g_object_unref (msge);
+       soup_uri_free (uri);
+
+
+       uri = soup_uri_new_with_base (base_uri, "/mbox.gz");
+
+       debug_printf (1, "GET /mbox.gz, Accept-Encoding: gzip\n");
+       soup_session_add_feature_by_type (session, SOUP_TYPE_CONTENT_DECODER);
+       msgz = soup_message_new_from_uri ("GET", uri);
+       soup_session_send_message (session, msgz);
+       if (!SOUP_STATUS_IS_SUCCESSFUL (msgz->status_code)) {
+               debug_printf (1, "  Unexpected status %d %s\n",
+                             msgz->status_code, msgz->reason_phrase);
+               errors++;
+       }
+       coding = soup_message_headers_get_one (msgz->response_headers, "Content-Encoding");
+       if (coding) {
+               debug_printf (1, "  Unexpected Content-Encoding: %s\n", coding);
+               errors++;
+       }
+       type = soup_message_headers_get_one (msgz->response_headers, "Content-Type");
+       if (!type || g_ascii_strcasecmp (type, "application/gzip") != 0) {
+               debug_printf (1, "  Unexpected Content-Type: %s\n",
+                             type ? type : "(none)");
+               errors++;
+       }
+
+
+       debug_printf (1, "GET /mbox.gz, Accept-Encoding: gzip, with server error\n");
+       msge = soup_message_new_from_uri ("GET", uri);
+       soup_message_headers_append (msge->request_headers,
+                                    "X-Test-Options", "force-encode");
+       soup_session_send_message (session, msge);
+       if (!SOUP_STATUS_IS_SUCCESSFUL (msge->status_code)) {
+               debug_printf (1, "  Unexpected status %d %s\n",
+                             msge->status_code, msge->reason_phrase);
+               errors++;
+       }
+       coding = soup_message_headers_get_one (msge->response_headers, "Content-Encoding");
+       if (!coding || g_ascii_strcasecmp (coding, "gzip") != 0) {
+               debug_printf (1, "  Unexpected Content-Encoding: %s\n",
+                             coding ? coding : "(none)");
+               errors++;
+       }
+       /* SoupContentDecoder should have recognized the bug and thus
+        * not decoded it
+        */
+       if (soup_message_get_flags (msge) & SOUP_MESSAGE_CONTENT_DECODED) {
+               debug_printf (1, "  SOUP_MESSAGE_CONTENT_DECODED set!\n");
+               errors++;
+       }
+       type = soup_message_headers_get_one (msge->response_headers, "Content-Type");
+       if (!type || g_ascii_strcasecmp (type, "application/gzip") != 0) {
+               debug_printf (1, "  Unexpected Content-Type: %s\n",
+                             type ? type : "(none)");
+               errors++;
+       }
+
+       if (msgz->response_body->length != msge->response_body->length) {
+               debug_printf (1, "  Message length mismatch: %lu (.gz) vs %lu (mis-encoded)\n",
+                             (gulong)msgz->response_body->length,
+                             (gulong)msge->response_body->length);
+               errors++;
+       } else if (memcmp (msgz->response_body->data,
+                          msge->response_body->data,
+                          msgz->response_body->length) != 0) {
+               debug_printf (1, "  Message data mismatch (gz/misencoded)\n");
+               errors++;
+       }
+
+
+       g_object_unref (msgz);
+       g_object_unref (msge);
        soup_uri_free (uri);
 
        soup_test_session_abort_unref (session);