1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * soup-headers.c: HTTP message header parsing
5 * Copyright (C) 2001-2003, Ximian, Inc.
11 #include "soup-headers.h"
16 * @str: the header string (including the Request-Line or Status-Line,
17 * but not the trailing blank line)
18 * @len: length of @str
19 * @dest: #SoupMessageHeaders to store the header values in
21 * Parses the headers of an HTTP request or response in @str and
22 * stores the results in @dest. Beware that @dest may be modified even
25 * This is a low-level method; normally you would use
26 * soup_headers_parse_request() or soup_headers_parse_response().
28 * Return value: success or failure
33 soup_headers_parse (const char *str, int len, SoupMessageHeaders *dest)
35 const char *headers_start;
36 char *headers_copy, *name, *name_end, *value, *value_end;
38 gboolean success = FALSE;
40 g_return_val_if_fail (str != NULL, FALSE);
41 g_return_val_if_fail (dest != NULL, FALSE);
43 /* RFC 2616 does allow NUL bytes in the headers, but httpbis
44 * is changing that, and we can't deal with them anyway.
46 if (memchr (str, '\0', len))
49 /* As per RFC 2616 section 19.3, we treat '\n' as the
50 * line terminator, and '\r', if it appears, merely as
51 * ignorable trailing whitespace.
54 /* Skip over the Request-Line / Status-Line */
55 headers_start = memchr (str, '\n', len);
59 /* We work on a copy of the headers, which we can write '\0's
60 * into, so that we don't have to individually g_strndup and
61 * then g_free each header name and value.
63 headers_copy = g_strndup (headers_start, len - (headers_start - str));
64 value_end = headers_copy;
66 while (*(value_end + 1)) {
68 name_end = strchr (name, ':');
70 /* Reject if there is no ':', or the header name is
71 * empty, or it contains whitespace.
75 name + strcspn (name, " \t\r\n") < name_end) {
76 /* Ignore this line. Note that if it has
77 * continuation lines, we'll end up ignoring
78 * them too since they'll start with spaces.
80 value_end = strchr (name, '\n');
86 /* Find the end of the value; ie, an end-of-line that
87 * isn't followed by a continuation line.
90 value_end = strchr (name, '\n');
93 while (*(value_end + 1) == ' ' || *(value_end + 1) == '\t') {
94 value_end = strchr (value_end + 1, '\n');
102 /* Skip leading whitespace */
103 while (value < value_end &&
104 (*value == ' ' || *value == '\t' ||
105 *value == '\r' || *value == '\n'))
108 /* Collapse continuation lines */
109 while ((eol = strchr (value, '\n'))) {
110 /* find start of next line */
112 while (*sol == ' ' || *sol == '\t')
115 /* back up over trailing whitespace on current line */
116 while (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r')
119 /* Delete all but one SP */
121 g_memmove (eol + 1, sol, strlen (sol) + 1);
124 /* clip trailing whitespace */
125 eol = strchr (value, '\0');
126 while (eol > value &&
127 (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r'))
131 /* convert (illegal) '\r's to spaces */
132 for (p = strchr (value, '\r'); p; p = strchr (p, '\r'))
135 soup_message_headers_append (dest, name, value);
140 g_free (headers_copy);
145 * soup_headers_parse_request:
146 * @str: the headers (up to, but not including, the trailing blank line)
147 * @len: length of @str
148 * @req_headers: #SoupMessageHeaders to store the header values in
149 * @req_method: (out) (allow-none): if non-%NULL, will be filled in with the
151 * @req_path: (out) (allow-none): if non-%NULL, will be filled in with the
153 * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP
156 * Parses the headers of an HTTP request in @str and stores the
157 * results in @req_method, @req_path, @ver, and @req_headers.
159 * Beware that @req_headers may be modified even on failure.
161 * Return value: %SOUP_STATUS_OK if the headers could be parsed, or an
162 * HTTP error to be returned to the client if they could not be.
165 soup_headers_parse_request (const char *str,
167 SoupMessageHeaders *req_headers,
170 SoupHTTPVersion *ver)
172 const char *method, *method_end, *path, *path_end;
173 const char *version, *version_end, *headers;
174 unsigned long major_version, minor_version;
177 g_return_val_if_fail (str != NULL, SOUP_STATUS_MALFORMED);
179 /* RFC 2616 4.1 "servers SHOULD ignore any empty line(s)
180 * received where a Request-Line is expected."
182 while ((*str == '\r' || *str == '\n') && len > 0) {
187 return SOUP_STATUS_BAD_REQUEST;
189 /* RFC 2616 19.3 "[servers] SHOULD accept any amount of SP or
190 * HT characters between [Request-Line] fields"
193 method = method_end = str;
194 while (method_end < str + len && *method_end != ' ' && *method_end != '\t')
196 if (method_end >= str + len)
197 return SOUP_STATUS_BAD_REQUEST;
200 while (path < str + len && (*path == ' ' || *path == '\t'))
202 if (path >= str + len)
203 return SOUP_STATUS_BAD_REQUEST;
206 while (path_end < str + len && *path_end != ' ' && *path_end != '\t')
208 if (path_end >= str + len)
209 return SOUP_STATUS_BAD_REQUEST;
212 while (version < str + len && (*version == ' ' || *version == '\t'))
214 if (version + 8 >= str + len)
215 return SOUP_STATUS_BAD_REQUEST;
217 if (strncmp (version, "HTTP/", 5) != 0 ||
218 !g_ascii_isdigit (version[5]))
219 return SOUP_STATUS_BAD_REQUEST;
220 major_version = strtoul (version + 5, &p, 10);
221 if (*p != '.' || !g_ascii_isdigit (p[1]))
222 return SOUP_STATUS_BAD_REQUEST;
223 minor_version = strtoul (p + 1, &p, 10);
225 if (major_version != 1)
226 return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
227 if (minor_version > 1)
228 return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
230 headers = version_end;
231 while (headers < str + len && (*headers == '\r' || *headers == ' '))
233 if (headers >= str + len || *headers != '\n')
234 return SOUP_STATUS_BAD_REQUEST;
236 if (!soup_headers_parse (str, len, req_headers))
237 return SOUP_STATUS_BAD_REQUEST;
239 if (soup_message_headers_get_expectations (req_headers) &
240 SOUP_EXPECTATION_UNRECOGNIZED)
241 return SOUP_STATUS_EXPECTATION_FAILED;
243 if (minor_version == 0)
244 soup_message_headers_clean_connection_headers (req_headers);
247 *req_method = g_strndup (method, method_end - method);
249 *req_path = g_strndup (path, path_end - path);
251 *ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1;
253 return SOUP_STATUS_OK;
257 * soup_headers_parse_status_line:
258 * @status_line: an HTTP Status-Line
259 * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP
261 * @status_code: (out) (allow-none): if non-%NULL, will be filled in with
263 * @reason_phrase: (out) (allow-none): if non-%NULL, will be filled in with
266 * Parses the HTTP Status-Line string in @status_line into @ver,
267 * @status_code, and @reason_phrase. @status_line must be terminated by
268 * either "\0" or "\r\n".
270 * Return value: %TRUE if @status_line was parsed successfully.
273 soup_headers_parse_status_line (const char *status_line,
274 SoupHTTPVersion *ver,
276 char **reason_phrase)
278 unsigned long major_version, minor_version, code;
279 const char *code_start, *code_end, *phrase_start, *phrase_end;
282 g_return_val_if_fail (status_line != NULL, FALSE);
284 if (strncmp (status_line, "HTTP/", 5) == 0 &&
285 g_ascii_isdigit (status_line[5])) {
286 major_version = strtoul (status_line + 5, &p, 10);
287 if (*p != '.' || !g_ascii_isdigit (p[1]))
289 minor_version = strtoul (p + 1, &p, 10);
290 if (major_version != 1)
292 if (minor_version > 1)
295 *ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1;
296 } else if (!strncmp (status_line, "ICY", 3)) {
297 /* Shoutcast not-quite-HTTP format */
299 *ver = SOUP_HTTP_1_0;
300 p = (char *)status_line + 3;
305 while (*code_start == ' ' || *code_start == '\t')
307 code_end = code_start;
308 while (*code_end >= '0' && *code_end <= '9')
310 if (code_end != code_start + 3)
312 code = atoi (code_start);
313 if (code < 100 || code > 599)
318 phrase_start = code_end;
319 while (*phrase_start == ' ' || *phrase_start == '\t')
321 phrase_end = phrase_start + strcspn (phrase_start, "\n");
322 while (phrase_end > phrase_start &&
323 (phrase_end[-1] == '\r' || phrase_end[-1] == ' ' || phrase_end[-1] == '\t'))
326 *reason_phrase = g_strndup (phrase_start, phrase_end - phrase_start);
332 * soup_headers_parse_response:
333 * @str: the headers (up to, but not including, the trailing blank line)
334 * @len: length of @str
335 * @headers: #SoupMessageHeaders to store the header values in
336 * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP
338 * @status_code: (out) (allow-none): if non-%NULL, will be filled in with
340 * @reason_phrase: (out) (allow-none): if non-%NULL, will be filled in with
343 * Parses the headers of an HTTP response in @str and stores the
344 * results in @ver, @status_code, @reason_phrase, and @headers.
346 * Beware that @headers may be modified even on failure.
348 * Return value: success or failure.
351 soup_headers_parse_response (const char *str,
353 SoupMessageHeaders *headers,
354 SoupHTTPVersion *ver,
356 char **reason_phrase)
358 SoupHTTPVersion version;
360 g_return_val_if_fail (str != NULL, FALSE);
362 /* Workaround for broken servers that send extra line breaks
363 * after a response, which we then see prepended to the next
364 * response on that connection.
366 while ((*str == '\r' || *str == '\n') && len > 0) {
373 if (!soup_headers_parse (str, len, headers))
376 if (!soup_headers_parse_status_line (str,
385 if (version == SOUP_HTTP_1_0)
386 soup_message_headers_clean_connection_headers (headers);
393 * Parsing of specific HTTP header types
397 skip_lws (const char *s)
399 while (g_ascii_isspace (*s))
405 unskip_lws (const char *s, const char *start)
407 while (s > start && g_ascii_isspace (*(s - 1)))
413 skip_delims (const char *s, char delim)
415 /* The grammar allows for multiple delimiters */
416 while (g_ascii_isspace (*s) || *s == delim)
422 skip_item (const char *s, char delim)
424 gboolean quoted = FALSE;
425 const char *start = s;
427 /* A list item ends at the last non-whitespace character
428 * before a delimiter which is not inside a quoted-string. Or
429 * at the end of the string.
436 if (*s == '\\' && *(s + 1))
445 return unskip_lws (s, start);
449 parse_list (const char *header, char delim)
454 header = skip_delims (header, delim);
456 end = skip_item (header, delim);
457 list = g_slist_prepend (list, g_strndup (header, end - header));
458 header = skip_delims (end, delim);
461 return g_slist_reverse (list);
465 * soup_header_parse_list:
466 * @header: a header value
468 * Parses a header whose content is described by RFC2616 as
469 * "#something", where "something" does not itself contain commas,
470 * except as part of quoted-strings.
472 * Return value: (transfer full) (element-type utf8): a #GSList of
473 * list elements, as allocated strings
476 soup_header_parse_list (const char *header)
478 g_return_val_if_fail (header != NULL, NULL);
480 return parse_list (header, ',');
489 sort_by_qval (const void *a, const void *b)
491 QualityItem *qia = (QualityItem *)a;
492 QualityItem *qib = (QualityItem *)b;
494 if (qia->qval == qib->qval)
496 else if (qia->qval < qib->qval)
503 * soup_header_parse_quality_list:
504 * @header: a header value
505 * @unacceptable: (out) (allow-none) (transfer full) (element-type utf8): on
506 * return, will contain a list of unacceptable values
508 * Parses a header whose content is a list of items with optional
509 * "qvalue"s (eg, Accept, Accept-Charset, Accept-Encoding,
510 * Accept-Language, TE).
512 * If @unacceptable is not %NULL, then on return, it will contain the
513 * items with qvalue 0. Either way, those items will be removed from
516 * Return value: (transfer full) (element-type utf8): a #GSList of
517 * acceptable values (as allocated strings), highest-qvalue first.
520 soup_header_parse_quality_list (const char *header, GSList **unacceptable)
524 GSList *sorted, *iter;
526 const char *param, *equal, *value;
530 g_return_val_if_fail (header != NULL, NULL);
533 *unacceptable = NULL;
535 unsorted = soup_header_parse_list (header);
536 array = g_new0 (QualityItem, g_slist_length (unsorted));
537 for (iter = unsorted, n = 0; iter; iter = iter->next) {
540 for (semi = strchr (item, ';'); semi; semi = strchr (semi + 1, ';')) {
541 param = skip_lws (semi + 1);
544 equal = skip_lws (param + 1);
545 if (!equal || *equal != '=')
547 value = skip_lws (equal + 1);
551 if (value[0] != '0' && value[0] != '1')
553 qval = (double)(value[0] - '0');
554 if (value[0] == '0' && value[1] == '.') {
555 if (g_ascii_isdigit (value[2])) {
556 qval += (double)(value[2] - '0') / 10;
557 if (g_ascii_isdigit (value[3])) {
558 qval += (double)(value[3] - '0') / 100;
559 if (g_ascii_isdigit (value[4]))
560 qval += (double)(value[4] - '0') / 1000;
571 *unacceptable = g_slist_prepend (*unacceptable,
575 array[n].item = item;
576 array[n].qval = qval;
580 g_slist_free (unsorted);
582 qsort (array, n, sizeof (QualityItem), sort_by_qval);
585 sorted = g_slist_prepend (sorted, array[n].item);
592 * soup_header_free_list: (skip)
593 * @list: a #GSList returned from soup_header_parse_list() or
594 * soup_header_parse_quality_list()
599 soup_header_free_list (GSList *list)
601 g_slist_free_full (list, g_free);
605 * soup_header_contains:
606 * @header: An HTTP header suitable for parsing with
607 * soup_header_parse_list()
610 * Parses @header to see if it contains the token @token (matched
611 * case-insensitively). Note that this can't be used with lists
614 * Return value: whether or not @header contains @token
617 soup_header_contains (const char *header, const char *token)
620 guint len = strlen (token);
622 g_return_val_if_fail (header != NULL, FALSE);
623 g_return_val_if_fail (token != NULL, FALSE);
625 header = skip_delims (header, ',');
627 end = skip_item (header, ',');
628 if (end - header == len &&
629 !g_ascii_strncasecmp (header, token, len))
631 header = skip_delims (end, ',');
638 decode_quoted_string (char *quoted_string)
642 src = quoted_string + 1;
644 while (*src && *src != '"') {
645 if (*src == '\\' && *(src + 1))
653 decode_rfc5987 (char *encoded_string)
656 gboolean iso_8859_1 = FALSE;
658 q = strchr (encoded_string, '\'');
661 if (g_ascii_strncasecmp (encoded_string, "UTF-8",
662 q - encoded_string) == 0)
664 else if (g_ascii_strncasecmp (encoded_string, "iso-8859-1",
665 q - encoded_string) == 0)
670 q = strchr (q + 1, '\'');
674 decoded = soup_uri_decode (q + 1);
676 char *utf8 = g_convert_with_fallback (decoded, -1, "UTF-8",
685 /* If encoded_string was UTF-8, then each 3-character %-escape
686 * will be converted to a single byte, and so decoded is
687 * shorter than encoded_string. If encoded_string was
688 * iso-8859-1, then each 3-character %-escape will be
689 * converted into at most 2 bytes in UTF-8, and so it's still
692 strcpy (encoded_string, decoded);
698 parse_param_list (const char *header, char delim)
702 char *item, *eq, *name_end, *value;
705 params = g_hash_table_new_full (soup_str_case_hash,
709 list = parse_list (header, delim);
710 for (iter = list; iter; iter = iter->next) {
714 eq = strchr (item, '=');
716 name_end = (char *)unskip_lws (eq, item);
717 if (name_end == item) {
718 /* That's no good... */
725 value = (char *)skip_lws (eq + 1);
727 if (name_end[-1] == '*' && name_end > item + 1) {
729 if (!decode_rfc5987 (value)) {
734 } else if (*value == '"')
735 decode_quoted_string (value);
739 if (override || !g_hash_table_lookup (params, item))
740 g_hash_table_replace (params, item, value);
750 * soup_header_parse_param_list:
751 * @header: a header value
753 * Parses a header which is a comma-delimited list of something like:
754 * <literal>token [ "=" ( token | quoted-string ) ]</literal>.
756 * Tokens that don't have an associated value will still be added to
757 * the resulting hash table, but with a %NULL value.
759 * This also handles RFC5987 encoding (which in HTTP is mostly used
760 * for giving UTF8-encoded filenames in the Content-Disposition
763 * Return value: (element-type utf8 utf8) (transfer full): a
764 * #GHashTable of list elements, which can be freed with
765 * soup_header_free_param_list().
768 soup_header_parse_param_list (const char *header)
770 g_return_val_if_fail (header != NULL, NULL);
772 return parse_param_list (header, ',');
776 * soup_header_parse_semi_param_list:
777 * @header: a header value
779 * Parses a header which is a semicolon-delimited list of something
780 * like: <literal>token [ "=" ( token | quoted-string ) ]</literal>.
782 * Tokens that don't have an associated value will still be added to
783 * the resulting hash table, but with a %NULL value.
785 * This also handles RFC5987 encoding (which in HTTP is mostly used
786 * for giving UTF8-encoded filenames in the Content-Disposition
789 * Return value: (element-type utf8 utf8) (transfer full): a
790 * #GHashTable of list elements, which can be freed with
791 * soup_header_free_param_list().
796 soup_header_parse_semi_param_list (const char *header)
798 g_return_val_if_fail (header != NULL, NULL);
800 return parse_param_list (header, ';');
804 * soup_header_free_param_list:
805 * @param_list: (element-type utf8 utf8): a #GHashTable returned from soup_header_parse_param_list()
806 * or soup_header_parse_semi_param_list()
811 soup_header_free_param_list (GHashTable *param_list)
813 g_return_if_fail (param_list != NULL);
815 g_hash_table_destroy (param_list);
819 append_param_rfc5987 (GString *string,
825 g_string_append (string, name);
826 g_string_append (string, "*=UTF-8''");
827 encoded = soup_uri_encode (value, " *'%()<>@,;:\\\"/[]?=");
828 g_string_append (string, encoded);
833 append_param_quoted (GString *string,
839 g_string_append (string, name);
840 g_string_append (string, "=\"");
842 while (*value == '\\' || *value == '"') {
843 g_string_append_c (string, '\\');
844 g_string_append_c (string, *value++);
846 len = strcspn (value, "\\\"");
847 g_string_append_len (string, value, len);
850 g_string_append_c (string, '"');
854 append_param_internal (GString *string,
857 gboolean allow_token)
860 gboolean use_token = allow_token;
862 for (v = value; *v; v++) {
864 if (g_utf8_validate (value, -1, NULL)) {
865 append_param_rfc5987 (string, name, value);
871 } else if (!soup_char_is_token (*v))
876 g_string_append (string, name);
877 g_string_append_c (string, '=');
878 g_string_append (string, value);
880 append_param_quoted (string, name, value);
884 * soup_header_g_string_append_param_quoted:
885 * @string: a #GString being used to construct an HTTP header value
886 * @name: a parameter name
887 * @value: a parameter value
889 * Appends something like <literal>@name="@value"</literal> to
890 * @string, taking care to escape any quotes or backslashes in @value.
892 * If @value is (non-ASCII) UTF-8, this will instead use RFC 5987
893 * encoding, just like soup_header_g_string_append_param().
898 soup_header_g_string_append_param_quoted (GString *string,
902 g_return_if_fail (string != NULL);
903 g_return_if_fail (name != NULL);
904 g_return_if_fail (value != NULL);
906 append_param_internal (string, name, value, FALSE);
910 * soup_header_g_string_append_param:
911 * @string: a #GString being used to construct an HTTP header value
912 * @name: a parameter name
913 * @value: a parameter value, or %NULL
915 * Appends something like <literal>@name=@value</literal> to @string,
916 * taking care to quote @value if needed, and if so, to escape any
917 * quotes or backslashes in @value.
919 * Alternatively, if @value is a non-ASCII UTF-8 string, it will be
920 * appended using RFC5987 syntax. Although in theory this is supposed
921 * to work anywhere in HTTP that uses this style of parameter, in
922 * reality, it can only be used portably with the Content-Disposition
923 * "filename" parameter.
925 * If @value is %NULL, this will just append @name to @string.
930 soup_header_g_string_append_param (GString *string,
934 g_return_if_fail (string != NULL);
935 g_return_if_fail (name != NULL);
938 g_string_append (string, name);
942 append_param_internal (string, name, value, TRUE);