From d94e3e1abfecad4c2f64b44db854352d946d7551 Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Mon, 4 Apr 2011 13:18:36 -0400 Subject: [PATCH] soup-headers: Update UTF-8 header param handling for RFC 5987 Update the handling of UTF-8 header parameters for the changes/clarifications to RFC 2231 published in RFC 5987: * Decode iso-8859-1 params in addition to UTF-8 ones * An encoded UTF-8 param should override an unencoded param of the same name, regardless of which order they appear in --- libsoup/soup-headers.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/libsoup/soup-headers.c b/libsoup/soup-headers.c index ecc3aec..33f6f6f 100644 --- a/libsoup/soup-headers.c +++ b/libsoup/soup-headers.c @@ -664,15 +664,21 @@ decode_quoted_string (char *quoted_string) } static gboolean -decode_rfc2231 (char *encoded_string) +decode_rfc5987 (char *encoded_string) { char *q, *decoded; + gboolean iso_8859_1 = FALSE; q = strchr (encoded_string, '\''); if (!q) return FALSE; if (g_ascii_strncasecmp (encoded_string, "UTF-8", - q - encoded_string) != 0) + q - encoded_string) == 0) + ; + else if (g_ascii_strncasecmp (encoded_string, "iso-8859-1", + q - encoded_string) == 0) + iso_8859_1 = TRUE; + else return FALSE; q = strchr (q + 1, '\''); @@ -680,7 +686,23 @@ decode_rfc2231 (char *encoded_string) return FALSE; decoded = soup_uri_decode (q + 1); - /* strlen(decoded) <= strlen(q + 1) < strlen(encoded_string) */ + if (iso_8859_1) { + char *utf8 = g_convert_with_fallback (decoded, -1, "UTF-8", + "iso-8859-1", "_", + NULL, NULL, NULL); + g_free (decoded); + if (!utf8) + return FALSE; + decoded = utf8; + } + + /* If encoded_string was UTF-8, then each 3-character %-escape + * will be converted to a single byte, and so decoded is + * shorter than encoded_string. If encoded_string was + * iso-8859-1, then each 3-character %-escape will be + * converted into at most 2 bytes in UTF-8, and so it's still + * shorter. + */ strcpy (encoded_string, decoded); g_free (decoded); return TRUE; @@ -692,6 +714,7 @@ parse_param_list (const char *header, char delim) GHashTable *params; GSList *list, *iter; char *item, *eq, *name_end, *value; + gboolean override; list = parse_list (header, delim); if (!list) @@ -703,6 +726,7 @@ parse_param_list (const char *header, char delim) for (iter = list; iter; iter = iter->next) { item = iter->data; + override = FALSE; eq = strchr (item, '='); if (eq) { @@ -719,16 +743,18 @@ parse_param_list (const char *header, char delim) if (name_end[-1] == '*' && name_end > item + 1) { name_end[-1] = '\0'; - if (!decode_rfc2231 (value)) { + if (!decode_rfc5987 (value)) { g_free (item); continue; } + override = TRUE; } else if (*value == '"') decode_quoted_string (value); } else value = NULL; - g_hash_table_insert (params, item, value); + if (override || !g_hash_table_lookup (params, item)) + g_hash_table_replace (params, item, value); } g_slist_free (list); @@ -745,7 +771,7 @@ parse_param_list (const char *header, char delim) * Tokens that don't have an associated value will still be added to * the resulting hash table, but with a %NULL value. * - * This also handles RFC2231 encoding (which in HTTP is mostly used + * This also handles RFC5987 encoding (which in HTTP is mostly used * for giving UTF8-encoded filenames in the Content-Disposition * header). * @@ -771,7 +797,7 @@ soup_header_parse_param_list (const char *header) * Tokens that don't have an associated value will still be added to * the resulting hash table, but with a %NULL value. * - * This also handles RFC2231 encoding (which in HTTP is mostly used + * This also handles RFC5987 encoding (which in HTTP is mostly used * for giving UTF8-encoded filenames in the Content-Disposition * header). * @@ -805,7 +831,7 @@ soup_header_free_param_list (GHashTable *param_list) } static void -append_param_rfc2231 (GString *string, +append_param_rfc5987 (GString *string, const char *name, const char *value) { @@ -865,7 +891,7 @@ soup_header_g_string_append_param_quoted (GString *string, * quotes or backslashes in @value. * * Alternatively, if @value is a non-ASCII UTF-8 string, it will be - * appended using RFC2231 syntax. Although in theory this is supposed + * appended using RFC5987 syntax. Although in theory this is supposed * to work anywhere in HTTP that uses this style of parameter, in * reality, it can only be used portably with the Content-Disposition * "filename" parameter. -- 2.7.4