1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * soup-headers.c: HTTP message header parsing
5 * Copyright (C) 2001-2003, Ximian, Inc.
15 #include "soup-headers.h"
20 * @str: the header string (including the Request-Line or Status-Line,
21 * but not the trailing blank line)
22 * @len: length of @str
23 * @dest: #SoupMessageHeaders to store the header values in
25 * Parses the headers of an HTTP request or response in @str and
26 * stores the results in @dest. Beware that @dest may be modified even
29 * This is a low-level method; normally you would use
30 * soup_headers_parse_request() or soup_headers_parse_response().
32 * Return value: success or failure
37 soup_headers_parse (const char *str, int len, SoupMessageHeaders *dest)
39 const char *headers_start;
40 char *headers_copy, *name, *name_end, *value, *value_end;
43 gboolean success = FALSE;
45 g_return_val_if_fail (str != NULL, FALSE);
46 g_return_val_if_fail (dest != NULL, FALSE);
48 /* As per RFC 2616 section 19.3, we treat '\n' as the
49 * line terminator, and '\r', if it appears, merely as
50 * ignorable trailing whitespace.
53 /* Skip over the Request-Line / Status-Line */
54 headers_start = memchr (str, '\n', len);
57 /* No '\0's in the Request-Line / Status-Line */
58 if (memchr (str, '\0', headers_start - str))
61 /* We work on a copy of the headers, which we can write '\0's
62 * into, so that we don't have to individually g_strndup and
63 * then g_free each header name and value.
65 copy_len = len - (headers_start - str);
66 headers_copy = g_malloc (copy_len + 1);
67 memcpy (headers_copy, headers_start, copy_len);
68 headers_copy[copy_len] = '\0';
69 value_end = headers_copy;
71 /* There shouldn't be any '\0's in the headers already, but
72 * this is the web we're talking about.
74 while ((p = memchr (headers_copy, '\0', copy_len))) {
75 memmove (p, p + 1, copy_len - (p - headers_copy));
79 while (*(value_end + 1)) {
81 name_end = strchr (name, ':');
83 /* Reject if there is no ':', or the header name is
84 * empty, or it contains whitespace.
88 name + strcspn (name, " \t\r\n") < name_end) {
89 /* Ignore this line. Note that if it has
90 * continuation lines, we'll end up ignoring
91 * them too since they'll start with spaces.
93 value_end = strchr (name, '\n');
99 /* Find the end of the value; ie, an end-of-line that
100 * isn't followed by a continuation line.
102 value = name_end + 1;
103 value_end = strchr (name, '\n');
106 while (*(value_end + 1) == ' ' || *(value_end + 1) == '\t') {
107 value_end = strchr (value_end + 1, '\n');
115 /* Skip leading whitespace */
116 while (value < value_end &&
117 (*value == ' ' || *value == '\t' ||
118 *value == '\r' || *value == '\n'))
121 /* Collapse continuation lines */
122 while ((eol = strchr (value, '\n'))) {
123 /* find start of next line */
125 while (*sol == ' ' || *sol == '\t')
128 /* back up over trailing whitespace on current line */
129 while (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r')
132 /* Delete all but one SP */
134 memmove (eol + 1, sol, strlen (sol) + 1);
137 /* clip trailing whitespace */
138 eol = strchr (value, '\0');
139 while (eol > value &&
140 (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r'))
144 /* convert (illegal) '\r's to spaces */
145 for (p = strchr (value, '\r'); p; p = strchr (p, '\r'))
148 soup_message_headers_append (dest, name, value);
153 g_free (headers_copy);
158 * soup_headers_parse_request:
159 * @str: the headers (up to, but not including, the trailing blank line)
160 * @len: length of @str
161 * @req_headers: #SoupMessageHeaders to store the header values in
162 * @req_method: (out) (allow-none): if non-%NULL, will be filled in with the
164 * @req_path: (out) (allow-none): if non-%NULL, will be filled in with the
166 * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP
169 * Parses the headers of an HTTP request in @str and stores the
170 * results in @req_method, @req_path, @ver, and @req_headers.
172 * Beware that @req_headers may be modified even on failure.
174 * Return value: %SOUP_STATUS_OK if the headers could be parsed, or an
175 * HTTP error to be returned to the client if they could not be.
178 soup_headers_parse_request (const char *str,
180 SoupMessageHeaders *req_headers,
183 SoupHTTPVersion *ver)
185 const char *method, *method_end, *path, *path_end;
186 const char *version, *version_end, *headers;
187 unsigned long major_version, minor_version;
190 g_return_val_if_fail (str != NULL, SOUP_STATUS_MALFORMED);
192 /* RFC 2616 4.1 "servers SHOULD ignore any empty line(s)
193 * received where a Request-Line is expected."
195 while ((*str == '\r' || *str == '\n') && len > 0) {
200 return SOUP_STATUS_BAD_REQUEST;
202 /* RFC 2616 19.3 "[servers] SHOULD accept any amount of SP or
203 * HT characters between [Request-Line] fields"
206 method = method_end = str;
207 while (method_end < str + len && *method_end != ' ' && *method_end != '\t')
209 if (method_end >= str + len)
210 return SOUP_STATUS_BAD_REQUEST;
213 while (path < str + len && (*path == ' ' || *path == '\t'))
215 if (path >= str + len)
216 return SOUP_STATUS_BAD_REQUEST;
219 while (path_end < str + len && *path_end != ' ' && *path_end != '\t')
221 if (path_end >= str + len)
222 return SOUP_STATUS_BAD_REQUEST;
225 while (version < str + len && (*version == ' ' || *version == '\t'))
227 if (version + 8 >= str + len)
228 return SOUP_STATUS_BAD_REQUEST;
230 if (strncmp (version, "HTTP/", 5) != 0 ||
231 !g_ascii_isdigit (version[5]))
232 return SOUP_STATUS_BAD_REQUEST;
233 major_version = strtoul (version + 5, &p, 10);
234 if (*p != '.' || !g_ascii_isdigit (p[1]))
235 return SOUP_STATUS_BAD_REQUEST;
236 minor_version = strtoul (p + 1, &p, 10);
238 if (major_version != 1)
239 return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
240 if (minor_version > 1)
241 return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
243 headers = version_end;
244 while (headers < str + len && (*headers == '\r' || *headers == ' '))
246 if (headers >= str + len || *headers != '\n')
247 return SOUP_STATUS_BAD_REQUEST;
249 if (!soup_headers_parse (str, len, req_headers))
250 return SOUP_STATUS_BAD_REQUEST;
252 if (soup_message_headers_get_expectations (req_headers) &
253 SOUP_EXPECTATION_UNRECOGNIZED)
254 return SOUP_STATUS_EXPECTATION_FAILED;
256 if (minor_version == 0)
257 soup_message_headers_clean_connection_headers (req_headers);
260 *req_method = g_strndup (method, method_end - method);
262 *req_path = g_strndup (path, path_end - path);
264 *ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1;
266 return SOUP_STATUS_OK;
270 * soup_headers_parse_status_line:
271 * @status_line: an HTTP Status-Line
272 * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP
274 * @status_code: (out) (allow-none): if non-%NULL, will be filled in with
276 * @reason_phrase: (out) (allow-none): if non-%NULL, will be filled in with
279 * Parses the HTTP Status-Line string in @status_line into @ver,
280 * @status_code, and @reason_phrase. @status_line must be terminated by
281 * either "\0" or "\r\n".
283 * Return value: %TRUE if @status_line was parsed successfully.
286 soup_headers_parse_status_line (const char *status_line,
287 SoupHTTPVersion *ver,
289 char **reason_phrase)
291 unsigned long major_version, minor_version, code;
292 const char *code_start, *code_end, *phrase_start, *phrase_end;
295 g_return_val_if_fail (status_line != NULL, FALSE);
297 if (strncmp (status_line, "HTTP/", 5) == 0 &&
298 g_ascii_isdigit (status_line[5])) {
299 major_version = strtoul (status_line + 5, &p, 10);
300 if (*p != '.' || !g_ascii_isdigit (p[1]))
302 minor_version = strtoul (p + 1, &p, 10);
303 if (major_version != 1)
305 if (minor_version > 1)
308 *ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1;
309 } else if (!strncmp (status_line, "ICY", 3)) {
310 /* Shoutcast not-quite-HTTP format */
312 *ver = SOUP_HTTP_1_0;
313 p = (char *)status_line + 3;
318 while (*code_start == ' ' || *code_start == '\t')
320 code_end = code_start;
321 while (*code_end >= '0' && *code_end <= '9')
323 if (code_end != code_start + 3)
325 code = atoi (code_start);
326 if (code < 100 || code > 999)
331 phrase_start = code_end;
332 while (*phrase_start == ' ' || *phrase_start == '\t')
334 phrase_end = phrase_start + strcspn (phrase_start, "\n");
335 while (phrase_end > phrase_start &&
336 (phrase_end[-1] == '\r' || phrase_end[-1] == ' ' || phrase_end[-1] == '\t'))
339 *reason_phrase = g_strndup (phrase_start, phrase_end - phrase_start);
345 * soup_headers_parse_response:
346 * @str: the headers (up to, but not including, the trailing blank line)
347 * @len: length of @str
348 * @headers: #SoupMessageHeaders to store the header values in
349 * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP
351 * @status_code: (out) (allow-none): if non-%NULL, will be filled in with
353 * @reason_phrase: (out) (allow-none): if non-%NULL, will be filled in with
356 * Parses the headers of an HTTP response in @str and stores the
357 * results in @ver, @status_code, @reason_phrase, and @headers.
359 * Beware that @headers may be modified even on failure.
361 * Return value: success or failure.
364 soup_headers_parse_response (const char *str,
366 SoupMessageHeaders *headers,
367 SoupHTTPVersion *ver,
369 char **reason_phrase)
371 SoupHTTPVersion version;
373 g_return_val_if_fail (str != NULL, FALSE);
375 /* Workaround for broken servers that send extra line breaks
376 * after a response, which we then see prepended to the next
377 * response on that connection.
379 while ((*str == '\r' || *str == '\n') && len > 0) {
386 if (!soup_headers_parse (str, len, headers))
389 if (!soup_headers_parse_status_line (str,
398 if (version == SOUP_HTTP_1_0)
399 soup_message_headers_clean_connection_headers (headers);
406 * Parsing of specific HTTP header types
410 skip_lws (const char *s)
412 while (g_ascii_isspace (*s))
418 unskip_lws (const char *s, const char *start)
420 while (s > start && g_ascii_isspace (*(s - 1)))
426 skip_delims (const char *s, char delim)
428 /* The grammar allows for multiple delimiters */
429 while (g_ascii_isspace (*s) || *s == delim)
435 skip_item (const char *s, char delim)
437 gboolean quoted = FALSE;
438 const char *start = s;
440 /* A list item ends at the last non-whitespace character
441 * before a delimiter which is not inside a quoted-string. Or
442 * at the end of the string.
449 if (*s == '\\' && *(s + 1))
458 return unskip_lws (s, start);
462 parse_list (const char *header, char delim)
467 header = skip_delims (header, delim);
469 end = skip_item (header, delim);
470 list = g_slist_prepend (list, g_strndup (header, end - header));
471 header = skip_delims (end, delim);
474 return g_slist_reverse (list);
478 * soup_header_parse_list:
479 * @header: a header value
481 * Parses a header whose content is described by RFC2616 as
482 * "#something", where "something" does not itself contain commas,
483 * except as part of quoted-strings.
485 * Return value: (transfer full) (element-type utf8): a #GSList of
486 * list elements, as allocated strings
489 soup_header_parse_list (const char *header)
491 g_return_val_if_fail (header != NULL, NULL);
493 return parse_list (header, ',');
502 sort_by_qval (const void *a, const void *b)
504 QualityItem *qia = (QualityItem *)a;
505 QualityItem *qib = (QualityItem *)b;
507 if (qia->qval == qib->qval)
509 else if (qia->qval < qib->qval)
516 * soup_header_parse_quality_list:
517 * @header: a header value
518 * @unacceptable: (out) (allow-none) (transfer full) (element-type utf8): on
519 * return, will contain a list of unacceptable values
521 * Parses a header whose content is a list of items with optional
522 * "qvalue"s (eg, Accept, Accept-Charset, Accept-Encoding,
523 * Accept-Language, TE).
525 * If @unacceptable is not %NULL, then on return, it will contain the
526 * items with qvalue 0. Either way, those items will be removed from
529 * Return value: (transfer full) (element-type utf8): a #GSList of
530 * acceptable values (as allocated strings), highest-qvalue first.
533 soup_header_parse_quality_list (const char *header, GSList **unacceptable)
537 GSList *sorted, *iter;
539 const char *param, *equal, *value;
543 g_return_val_if_fail (header != NULL, NULL);
546 *unacceptable = NULL;
548 unsorted = soup_header_parse_list (header);
549 array = g_new0 (QualityItem, g_slist_length (unsorted));
550 for (iter = unsorted, n = 0; iter; iter = iter->next) {
553 for (semi = strchr (item, ';'); semi; semi = strchr (semi + 1, ';')) {
554 param = skip_lws (semi + 1);
557 equal = skip_lws (param + 1);
558 if (!equal || *equal != '=')
560 value = skip_lws (equal + 1);
564 if (value[0] != '0' && value[0] != '1')
566 qval = (double)(value[0] - '0');
567 if (value[0] == '0' && value[1] == '.') {
568 if (g_ascii_isdigit (value[2])) {
569 qval += (double)(value[2] - '0') / 10;
570 if (g_ascii_isdigit (value[3])) {
571 qval += (double)(value[3] - '0') / 100;
572 if (g_ascii_isdigit (value[4]))
573 qval += (double)(value[4] - '0') / 1000;
584 *unacceptable = g_slist_prepend (*unacceptable,
588 array[n].item = item;
589 array[n].qval = qval;
593 g_slist_free (unsorted);
595 qsort (array, n, sizeof (QualityItem), sort_by_qval);
598 sorted = g_slist_prepend (sorted, array[n].item);
605 * soup_header_free_list: (skip)
606 * @list: a #GSList returned from soup_header_parse_list() or
607 * soup_header_parse_quality_list()
612 soup_header_free_list (GSList *list)
614 g_slist_free_full (list, g_free);
618 * soup_header_contains:
619 * @header: An HTTP header suitable for parsing with
620 * soup_header_parse_list()
623 * Parses @header to see if it contains the token @token (matched
624 * case-insensitively). Note that this can't be used with lists
627 * Return value: whether or not @header contains @token
630 soup_header_contains (const char *header, const char *token)
635 g_return_val_if_fail (header != NULL, FALSE);
636 g_return_val_if_fail (token != NULL, FALSE);
638 len = strlen (token);
640 header = skip_delims (header, ',');
642 end = skip_item (header, ',');
643 if (end - header == len &&
644 !g_ascii_strncasecmp (header, token, len))
646 header = skip_delims (end, ',');
653 decode_quoted_string (char *quoted_string)
657 src = quoted_string + 1;
659 while (*src && *src != '"') {
660 if (*src == '\\' && *(src + 1))
668 decode_rfc5987 (char *encoded_string)
671 gboolean iso_8859_1 = FALSE;
673 q = strchr (encoded_string, '\'');
676 if (g_ascii_strncasecmp (encoded_string, "UTF-8",
677 q - encoded_string) == 0)
679 else if (g_ascii_strncasecmp (encoded_string, "iso-8859-1",
680 q - encoded_string) == 0)
685 q = strchr (q + 1, '\'');
689 decoded = soup_uri_decode (q + 1);
691 char *utf8 = g_convert_with_fallback (decoded, -1, "UTF-8",
700 /* If encoded_string was UTF-8, then each 3-character %-escape
701 * will be converted to a single byte, and so decoded is
702 * shorter than encoded_string. If encoded_string was
703 * iso-8859-1, then each 3-character %-escape will be
704 * converted into at most 2 bytes in UTF-8, and so it's still
707 strcpy (encoded_string, decoded);
713 parse_param_list (const char *header, char delim, gboolean strict)
717 char *item, *eq, *name_end, *value;
718 gboolean override, duplicated;
720 params = g_hash_table_new_full (soup_str_case_hash,
724 list = parse_list (header, delim);
725 for (iter = list; iter; iter = iter->next) {
729 eq = strchr (item, '=');
731 name_end = (char *)unskip_lws (eq, item);
732 if (name_end == item) {
733 /* That's no good... */
740 value = (char *)skip_lws (eq + 1);
742 if (name_end[-1] == '*' && name_end > item + 1) {
744 if (!decode_rfc5987 (value)) {
749 } else if (*value == '"')
750 decode_quoted_string (value);
754 duplicated = g_hash_table_lookup_extended (params, item, NULL, NULL);
756 if (strict && duplicated) {
757 soup_header_free_param_list (params);
759 g_slist_foreach (iter, (GFunc)g_free, NULL);
761 } else if (override || !duplicated)
762 g_hash_table_replace (params, item, value);
772 * soup_header_parse_param_list:
773 * @header: a header value
775 * Parses a header which is a comma-delimited list of something like:
776 * <literal>token [ "=" ( token | quoted-string ) ]</literal>.
778 * Tokens that don't have an associated value will still be added to
779 * the resulting hash table, but with a %NULL value.
781 * This also handles RFC5987 encoding (which in HTTP is mostly used
782 * for giving UTF8-encoded filenames in the Content-Disposition
785 * Return value: (element-type utf8 utf8) (transfer full): a
786 * #GHashTable of list elements, which can be freed with
787 * soup_header_free_param_list().
790 soup_header_parse_param_list (const char *header)
792 g_return_val_if_fail (header != NULL, NULL);
794 return parse_param_list (header, ',', FALSE);
798 * soup_header_parse_semi_param_list:
799 * @header: a header value
801 * Parses a header which is a semicolon-delimited list of something
802 * like: <literal>token [ "=" ( token | quoted-string ) ]</literal>.
804 * Tokens that don't have an associated value will still be added to
805 * the resulting hash table, but with a %NULL value.
807 * This also handles RFC5987 encoding (which in HTTP is mostly used
808 * for giving UTF8-encoded filenames in the Content-Disposition
811 * Return value: (element-type utf8 utf8) (transfer full): a
812 * #GHashTable of list elements, which can be freed with
813 * soup_header_free_param_list().
818 soup_header_parse_semi_param_list (const char *header)
820 g_return_val_if_fail (header != NULL, NULL);
822 return parse_param_list (header, ';', FALSE);
826 * soup_header_parse_param_list_strict:
827 * @header: a header value
829 * A strict version of soup_header_parse_param_list()
830 * that bails out if there are duplicate parameters.
831 * Note that this function will treat RFC5987-encoded
832 * parameters as duplicated if an ASCII version is also
833 * present. For header fields that might contain
834 * RFC5987-encoded parameters, use
835 * soup_header_parse_param_list() instead.
837 * Return value: (element-type utf8 utf8) (transfer full) (nullable):
838 * a #GHashTable of list elements, which can be freed with
839 * soup_header_free_param_list() or %NULL if there are duplicate
845 soup_header_parse_param_list_strict (const char *header)
847 g_return_val_if_fail (header != NULL, NULL);
849 return parse_param_list (header, ',', TRUE);
853 * soup_header_parse_semi_param_list_strict:
854 * @header: a header value
856 * A strict version of soup_header_parse_semi_param_list()
857 * that bails out if there are duplicate parameters.
858 * Note that this function will treat RFC5987-encoded
859 * parameters as duplicated if an ASCII version is also
860 * present. For header fields that might contain
861 * RFC5987-encoded parameters, use
862 * soup_header_parse_semi_param_list() instead.
864 * Return value: (element-type utf8 utf8) (transfer full) (nullable):
865 * a #GHashTable of list elements, which can be freed with
866 * soup_header_free_param_list() or %NULL if there are duplicate
872 soup_header_parse_semi_param_list_strict (const char *header)
874 g_return_val_if_fail (header != NULL, NULL);
876 return parse_param_list (header, ';', TRUE);
880 * soup_header_free_param_list:
881 * @param_list: (element-type utf8 utf8): a #GHashTable returned from soup_header_parse_param_list()
882 * or soup_header_parse_semi_param_list()
887 soup_header_free_param_list (GHashTable *param_list)
889 g_return_if_fail (param_list != NULL);
891 g_hash_table_destroy (param_list);
895 append_param_rfc5987 (GString *string,
901 g_string_append (string, name);
902 g_string_append (string, "*=UTF-8''");
903 encoded = soup_uri_encode (value, " *'%()<>@,;:\\\"/[]?=");
904 g_string_append (string, encoded);
909 append_param_quoted (GString *string,
915 g_string_append (string, name);
916 g_string_append (string, "=\"");
918 while (*value == '\\' || *value == '"') {
919 g_string_append_c (string, '\\');
920 g_string_append_c (string, *value++);
922 len = strcspn (value, "\\\"");
923 g_string_append_len (string, value, len);
926 g_string_append_c (string, '"');
930 append_param_internal (GString *string,
933 gboolean allow_token)
936 gboolean use_token = allow_token;
938 for (v = value; *v; v++) {
940 if (g_utf8_validate (value, -1, NULL)) {
941 append_param_rfc5987 (string, name, value);
947 } else if (!soup_char_is_token (*v))
952 g_string_append (string, name);
953 g_string_append_c (string, '=');
954 g_string_append (string, value);
956 append_param_quoted (string, name, value);
960 * soup_header_g_string_append_param_quoted:
961 * @string: a #GString being used to construct an HTTP header value
962 * @name: a parameter name
963 * @value: a parameter value
965 * Appends something like <literal>@name="@value"</literal> to
966 * @string, taking care to escape any quotes or backslashes in @value.
968 * If @value is (non-ASCII) UTF-8, this will instead use RFC 5987
969 * encoding, just like soup_header_g_string_append_param().
974 soup_header_g_string_append_param_quoted (GString *string,
978 g_return_if_fail (string != NULL);
979 g_return_if_fail (name != NULL);
980 g_return_if_fail (value != NULL);
982 append_param_internal (string, name, value, FALSE);
986 * soup_header_g_string_append_param:
987 * @string: a #GString being used to construct an HTTP header value
988 * @name: a parameter name
989 * @value: a parameter value, or %NULL
991 * Appends something like <literal>@name=@value</literal> to @string,
992 * taking care to quote @value if needed, and if so, to escape any
993 * quotes or backslashes in @value.
995 * Alternatively, if @value is a non-ASCII UTF-8 string, it will be
996 * appended using RFC5987 syntax. Although in theory this is supposed
997 * to work anywhere in HTTP that uses this style of parameter, in
998 * reality, it can only be used portably with the Content-Disposition
999 * "filename" parameter.
1001 * If @value is %NULL, this will just append @name to @string.
1006 soup_header_g_string_append_param (GString *string,
1010 g_return_if_fail (string != NULL);
1011 g_return_if_fail (name != NULL);
1014 g_string_append (string, name);
1018 append_param_internal (string, name, value, TRUE);