1 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
3 * Additional changes are licensed under the same terms as NGINX and
4 * copyright Joyent, Inc. and other Node contributors. All rights reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "http_parser.h"
33 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
37 # define MIN(a,b) ((a) < (b) ? (a) : (b))
41 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
45 # define BIT_AT(a, i) \
46 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47 (1 << ((unsigned int) (i) & 7))))
51 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
54 #define SET_ERRNO(e) \
56 parser->http_errno = (e); \
59 #define CURRENT_STATE() p_state
60 #define UPDATE_STATE(V) p_state = (enum state) (V);
63 parser->state = CURRENT_STATE(); \
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER) \
82 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
84 if (LIKELY(settings->on_##FOR)) { \
85 parser->state = CURRENT_STATE(); \
86 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
87 SET_ERRNO(HPE_CB_##FOR); \
89 UPDATE_STATE(parser->state); \
91 /* We either errored above or got paused; get out */ \
92 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER) \
107 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
110 if (LIKELY(settings->on_##FOR)) { \
111 parser->state = CURRENT_STATE(); \
113 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114 SET_ERRNO(HPE_CB_##FOR); \
116 UPDATE_STATE(parser->state); \
118 /* We either errored above or got paused; get out */ \
119 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR) \
129 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR) \
133 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
135 /* Set the mark FOR; non-destructive if mark is already set */
143 /* Don't allow the total size of the HTTP headers (including the status
144 * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
145 * embedders against denial-of-service attacks where the attacker feeds
146 * us a never-ending header that the embedder keeps buffering.
148 * This check is arguably the responsibility of embedders but we're doing
149 * it on the embedder's behalf because most won't bother and this way we
150 * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
151 * than any reasonable request or response so this should never affect
152 * day-to-day operation.
154 #define COUNT_HEADER_SIZE(V) \
156 parser->nread += (V); \
157 if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
158 SET_ERRNO(HPE_HEADER_OVERFLOW); \
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
174 static const char *method_strings[] =
176 #define XX(num, name, string) #string,
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183 * token = 1*<any CHAR except CTLs or separators>
184 * separators = "(" | ")" | "<" | ">" | "@"
185 * | "," | ";" | ":" | "\" | <">
186 * | "/" | "[" | "]" | "?" | "="
187 * | "{" | "}" | SP | HT
189 static const char tokens[256] = {
190 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
191 0, 0, 0, 0, 0, 0, 0, 0,
192 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
193 0, 0, 0, 0, 0, 0, 0, 0,
194 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
195 0, 0, 0, 0, 0, 0, 0, 0,
196 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
197 0, 0, 0, 0, 0, 0, 0, 0,
198 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
199 0, '!', 0, '#', '$', '%', '&', '\'',
200 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
201 0, 0, '*', '+', 0, '-', '.', 0,
202 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
203 '0', '1', '2', '3', '4', '5', '6', '7',
204 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
205 '8', '9', 0, 0, 0, 0, 0, 0,
206 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
207 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
208 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
209 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
210 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
211 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
212 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
213 'x', 'y', 'z', 0, 0, 0, '^', '_',
214 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
215 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
216 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
217 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
218 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
219 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
220 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
221 'x', 'y', 'z', 0, '|', 0, '~', 0 };
224 static const int8_t unhex[256] =
225 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
236 #if HTTP_PARSER_STRICT
243 static const uint8_t normal_url_char[32] = {
244 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
245 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
246 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
247 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
248 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
249 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
250 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
251 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
252 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
253 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
254 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
255 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
256 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
258 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
260 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
262 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
274 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
275 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
280 { s_dead = 1 /* important that this is > 0 */
289 , s_res_first_http_major
291 , s_res_first_http_minor
293 , s_res_first_status_code
297 , s_res_line_almost_done
302 , s_req_spaces_before_url
305 , s_req_schema_slash_slash
308 , s_req_server_with_at
310 , s_req_query_string_start
312 , s_req_fragment_start
319 , s_req_first_http_major
321 , s_req_first_http_minor
323 , s_req_line_almost_done
325 , s_header_field_start
327 , s_header_value_discard_ws
328 , s_header_value_discard_ws_almost_done
329 , s_header_value_discard_lws
330 , s_header_value_start
334 , s_header_almost_done
339 , s_chunk_size_almost_done
341 , s_headers_almost_done
344 /* Important: 's_headers_done' must be the last 'header' state. All
345 * states beyond this must be 'body' states. It is used for overflow
346 * checking. See the PARSING_HEADER() macro.
350 , s_chunk_data_almost_done
354 , s_body_identity_eof
360 #define PARSING_HEADER(state) (state <= s_headers_done)
369 , h_matching_connection
370 , h_matching_proxy_connection
371 , h_matching_content_length
372 , h_matching_transfer_encoding
377 , h_transfer_encoding
380 , h_matching_transfer_encoding_chunked
381 , h_matching_connection_token_start
382 , h_matching_connection_keep_alive
383 , h_matching_connection_close
384 , h_matching_connection_upgrade
385 , h_matching_connection_token
387 , h_transfer_encoding_chunked
388 , h_connection_keep_alive
390 , h_connection_upgrade
396 , s_http_userinfo_start
399 , s_http_host_v6_start
403 , s_http_host_port_start
407 /* Macros for character classes; depends on strict-mode */
410 #define LOWER(c) (unsigned char)(c | 0x20)
411 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
412 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
413 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
414 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
415 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
416 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
418 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
419 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
420 (c) == '$' || (c) == ',')
422 #define STRICT_TOKEN(c) (tokens[(unsigned char)c])
424 #if HTTP_PARSER_STRICT
425 #define TOKEN(c) (tokens[(unsigned char)c])
426 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
427 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
429 #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
430 #define IS_URL_CHAR(c) \
431 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
432 #define IS_HOST_CHAR(c) \
433 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
437 * Verify that a char is a valid visible (printable) US-ASCII
438 * character or %x80-FF
440 #define IS_HEADER_CHAR(ch) \
441 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
443 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
446 #if HTTP_PARSER_STRICT
447 # define STRICT_CHECK(cond) \
450 SET_ERRNO(HPE_STRICT); \
454 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
456 # define STRICT_CHECK(cond)
457 # define NEW_MESSAGE() start_state
461 /* Map errno values to strings for human-readable output */
462 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
465 const char *description;
466 } http_strerror_tab[] = {
467 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
469 #undef HTTP_STRERROR_GEN
471 int http_message_needs_eof(const http_parser *parser);
475 * This is designed to be shared by http_parser_execute() for URL validation,
476 * hence it has a state transition + byte-for-byte interface. In addition, it
477 * is meant to be embedded in http_parser_parse_url(), which does the dirty
478 * work of turning state transitions URL components for its API.
480 * This function should only be invoked with non-space characters. It is
481 * assumed that the caller cares about (and can detect) the transition between
482 * URL and non-URL states by looking for these.
485 parse_url_char(enum state s, const char ch)
487 if (ch == ' ' || ch == '\r' || ch == '\n') {
491 #if HTTP_PARSER_STRICT
492 if (ch == '\t' || ch == '\f') {
498 case s_req_spaces_before_url:
499 /* Proxied requests are followed by scheme of an absolute URI (alpha).
500 * All methods except CONNECT are followed by '/' or '*'.
503 if (ch == '/' || ch == '*') {
519 return s_req_schema_slash;
524 case s_req_schema_slash:
526 return s_req_schema_slash_slash;
531 case s_req_schema_slash_slash:
533 return s_req_server_start;
538 case s_req_server_with_at:
544 case s_req_server_start:
551 return s_req_query_string_start;
555 return s_req_server_with_at;
558 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
565 if (IS_URL_CHAR(ch)) {
571 return s_req_query_string_start;
574 return s_req_fragment_start;
579 case s_req_query_string_start:
580 case s_req_query_string:
581 if (IS_URL_CHAR(ch)) {
582 return s_req_query_string;
587 /* allow extra '?' in query string */
588 return s_req_query_string;
591 return s_req_fragment_start;
596 case s_req_fragment_start:
597 if (IS_URL_CHAR(ch)) {
598 return s_req_fragment;
603 return s_req_fragment;
612 if (IS_URL_CHAR(ch)) {
628 /* We should never fall out of the switch above unless there's an error */
632 size_t http_parser_execute (http_parser *parser,
633 const http_parser_settings *settings,
639 const char *p = data;
640 const char *header_field_mark = 0;
641 const char *header_value_mark = 0;
642 const char *url_mark = 0;
643 const char *body_mark = 0;
644 const char *status_mark = 0;
645 enum state p_state = (enum state) parser->state;
647 const unsigned int lenient = parser->lenient_http_headers;
649 /* We're in an error state. Don't bother doing anything. */
650 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
655 switch (CURRENT_STATE()) {
656 case s_body_identity_eof:
657 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
660 CALLBACK_NOTIFY_NOADVANCE(message_complete);
664 case s_start_req_or_res:
670 SET_ERRNO(HPE_INVALID_EOF_STATE);
676 if (CURRENT_STATE() == s_header_field)
677 header_field_mark = data;
678 if (CURRENT_STATE() == s_header_value)
679 header_value_mark = data;
680 switch (CURRENT_STATE()) {
683 case s_req_schema_slash:
684 case s_req_schema_slash_slash:
685 case s_req_server_start:
687 case s_req_server_with_at:
688 case s_req_query_string_start:
689 case s_req_query_string:
690 case s_req_fragment_start:
701 for (p=data; p != data + len; p++) {
704 if (PARSING_HEADER(CURRENT_STATE()))
705 COUNT_HEADER_SIZE(1);
708 switch (CURRENT_STATE()) {
711 /* this state is used after a 'Connection: close' message
712 * the parser will error out if it reads another message
714 if (LIKELY(ch == CR || ch == LF))
717 SET_ERRNO(HPE_CLOSED_CONNECTION);
720 case s_start_req_or_res:
722 if (ch == CR || ch == LF)
725 parser->content_length = ULLONG_MAX;
728 UPDATE_STATE(s_res_or_resp_H);
730 CALLBACK_NOTIFY(message_begin);
732 parser->type = HTTP_REQUEST;
733 UPDATE_STATE(s_start_req);
740 case s_res_or_resp_H:
742 parser->type = HTTP_RESPONSE;
743 UPDATE_STATE(s_res_HT);
745 if (UNLIKELY(ch != 'E')) {
746 SET_ERRNO(HPE_INVALID_CONSTANT);
750 parser->type = HTTP_REQUEST;
751 parser->method = HTTP_HEAD;
753 UPDATE_STATE(s_req_method);
760 parser->content_length = ULLONG_MAX;
764 UPDATE_STATE(s_res_H);
772 SET_ERRNO(HPE_INVALID_CONSTANT);
776 CALLBACK_NOTIFY(message_begin);
781 STRICT_CHECK(ch != 'T');
782 UPDATE_STATE(s_res_HT);
786 STRICT_CHECK(ch != 'T');
787 UPDATE_STATE(s_res_HTT);
791 STRICT_CHECK(ch != 'P');
792 UPDATE_STATE(s_res_HTTP);
796 STRICT_CHECK(ch != '/');
797 UPDATE_STATE(s_res_first_http_major);
800 case s_res_first_http_major:
801 if (UNLIKELY(ch < '0' || ch > '9')) {
802 SET_ERRNO(HPE_INVALID_VERSION);
806 parser->http_major = ch - '0';
807 UPDATE_STATE(s_res_http_major);
810 /* major HTTP version or dot */
811 case s_res_http_major:
814 UPDATE_STATE(s_res_first_http_minor);
819 SET_ERRNO(HPE_INVALID_VERSION);
823 parser->http_major *= 10;
824 parser->http_major += ch - '0';
826 if (UNLIKELY(parser->http_major > 999)) {
827 SET_ERRNO(HPE_INVALID_VERSION);
834 /* first digit of minor HTTP version */
835 case s_res_first_http_minor:
836 if (UNLIKELY(!IS_NUM(ch))) {
837 SET_ERRNO(HPE_INVALID_VERSION);
841 parser->http_minor = ch - '0';
842 UPDATE_STATE(s_res_http_minor);
845 /* minor HTTP version or end of request line */
846 case s_res_http_minor:
849 UPDATE_STATE(s_res_first_status_code);
853 if (UNLIKELY(!IS_NUM(ch))) {
854 SET_ERRNO(HPE_INVALID_VERSION);
858 parser->http_minor *= 10;
859 parser->http_minor += ch - '0';
861 if (UNLIKELY(parser->http_minor > 999)) {
862 SET_ERRNO(HPE_INVALID_VERSION);
869 case s_res_first_status_code:
876 SET_ERRNO(HPE_INVALID_STATUS);
879 parser->status_code = ch - '0';
880 UPDATE_STATE(s_res_status_code);
884 case s_res_status_code:
889 UPDATE_STATE(s_res_status_start);
892 UPDATE_STATE(s_res_line_almost_done);
895 UPDATE_STATE(s_header_field_start);
898 SET_ERRNO(HPE_INVALID_STATUS);
904 parser->status_code *= 10;
905 parser->status_code += ch - '0';
907 if (UNLIKELY(parser->status_code > 999)) {
908 SET_ERRNO(HPE_INVALID_STATUS);
915 case s_res_status_start:
918 UPDATE_STATE(s_res_line_almost_done);
923 UPDATE_STATE(s_header_field_start);
928 UPDATE_STATE(s_res_status);
935 UPDATE_STATE(s_res_line_almost_done);
936 CALLBACK_DATA(status);
941 UPDATE_STATE(s_header_field_start);
942 CALLBACK_DATA(status);
948 case s_res_line_almost_done:
949 STRICT_CHECK(ch != LF);
950 UPDATE_STATE(s_header_field_start);
955 if (ch == CR || ch == LF)
958 parser->content_length = ULLONG_MAX;
960 if (UNLIKELY(!IS_ALPHA(ch))) {
961 SET_ERRNO(HPE_INVALID_METHOD);
965 parser->method = (enum http_method) 0;
968 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
969 case 'D': parser->method = HTTP_DELETE; break;
970 case 'G': parser->method = HTTP_GET; break;
971 case 'H': parser->method = HTTP_HEAD; break;
972 case 'L': parser->method = HTTP_LOCK; break;
973 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
974 case 'N': parser->method = HTTP_NOTIFY; break;
975 case 'O': parser->method = HTTP_OPTIONS; break;
976 case 'P': parser->method = HTTP_POST;
977 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
979 case 'R': parser->method = HTTP_REPORT; break;
980 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
981 case 'T': parser->method = HTTP_TRACE; break;
982 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
984 SET_ERRNO(HPE_INVALID_METHOD);
987 UPDATE_STATE(s_req_method);
989 CALLBACK_NOTIFY(message_begin);
997 if (UNLIKELY(ch == '\0')) {
998 SET_ERRNO(HPE_INVALID_METHOD);
1002 matcher = method_strings[parser->method];
1003 if (ch == ' ' && matcher[parser->index] == '\0') {
1004 UPDATE_STATE(s_req_spaces_before_url);
1005 } else if (ch == matcher[parser->index]) {
1007 } else if (parser->method == HTTP_CONNECT) {
1008 if (parser->index == 1 && ch == 'H') {
1009 parser->method = HTTP_CHECKOUT;
1010 } else if (parser->index == 2 && ch == 'P') {
1011 parser->method = HTTP_COPY;
1013 SET_ERRNO(HPE_INVALID_METHOD);
1016 } else if (parser->method == HTTP_MKCOL) {
1017 if (parser->index == 1 && ch == 'O') {
1018 parser->method = HTTP_MOVE;
1019 } else if (parser->index == 1 && ch == 'E') {
1020 parser->method = HTTP_MERGE;
1021 } else if (parser->index == 1 && ch == '-') {
1022 parser->method = HTTP_MSEARCH;
1023 } else if (parser->index == 2 && ch == 'A') {
1024 parser->method = HTTP_MKACTIVITY;
1025 } else if (parser->index == 3 && ch == 'A') {
1026 parser->method = HTTP_MKCALENDAR;
1028 SET_ERRNO(HPE_INVALID_METHOD);
1031 } else if (parser->method == HTTP_SUBSCRIBE) {
1032 if (parser->index == 1 && ch == 'E') {
1033 parser->method = HTTP_SEARCH;
1035 SET_ERRNO(HPE_INVALID_METHOD);
1038 } else if (parser->index == 1 && parser->method == HTTP_POST) {
1040 parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
1041 } else if (ch == 'U') {
1042 parser->method = HTTP_PUT; /* or HTTP_PURGE */
1043 } else if (ch == 'A') {
1044 parser->method = HTTP_PATCH;
1046 SET_ERRNO(HPE_INVALID_METHOD);
1049 } else if (parser->index == 2) {
1050 if (parser->method == HTTP_PUT) {
1052 parser->method = HTTP_PURGE;
1054 SET_ERRNO(HPE_INVALID_METHOD);
1057 } else if (parser->method == HTTP_UNLOCK) {
1059 parser->method = HTTP_UNSUBSCRIBE;
1061 SET_ERRNO(HPE_INVALID_METHOD);
1065 SET_ERRNO(HPE_INVALID_METHOD);
1068 } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
1069 parser->method = HTTP_PROPPATCH;
1071 SET_ERRNO(HPE_INVALID_METHOD);
1079 case s_req_spaces_before_url:
1081 if (ch == ' ') break;
1084 if (parser->method == HTTP_CONNECT) {
1085 UPDATE_STATE(s_req_server_start);
1088 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1089 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1090 SET_ERRNO(HPE_INVALID_URL);
1098 case s_req_schema_slash:
1099 case s_req_schema_slash_slash:
1100 case s_req_server_start:
1103 /* No whitespace allowed here */
1107 SET_ERRNO(HPE_INVALID_URL);
1110 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1111 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1112 SET_ERRNO(HPE_INVALID_URL);
1121 case s_req_server_with_at:
1123 case s_req_query_string_start:
1124 case s_req_query_string:
1125 case s_req_fragment_start:
1126 case s_req_fragment:
1130 UPDATE_STATE(s_req_http_start);
1135 parser->http_major = 0;
1136 parser->http_minor = 9;
1137 UPDATE_STATE((ch == CR) ?
1138 s_req_line_almost_done :
1139 s_header_field_start);
1143 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1144 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1145 SET_ERRNO(HPE_INVALID_URL);
1152 case s_req_http_start:
1155 UPDATE_STATE(s_req_http_H);
1160 SET_ERRNO(HPE_INVALID_CONSTANT);
1166 STRICT_CHECK(ch != 'T');
1167 UPDATE_STATE(s_req_http_HT);
1171 STRICT_CHECK(ch != 'T');
1172 UPDATE_STATE(s_req_http_HTT);
1175 case s_req_http_HTT:
1176 STRICT_CHECK(ch != 'P');
1177 UPDATE_STATE(s_req_http_HTTP);
1180 case s_req_http_HTTP:
1181 STRICT_CHECK(ch != '/');
1182 UPDATE_STATE(s_req_first_http_major);
1185 /* first digit of major HTTP version */
1186 case s_req_first_http_major:
1187 if (UNLIKELY(ch < '1' || ch > '9')) {
1188 SET_ERRNO(HPE_INVALID_VERSION);
1192 parser->http_major = ch - '0';
1193 UPDATE_STATE(s_req_http_major);
1196 /* major HTTP version or dot */
1197 case s_req_http_major:
1200 UPDATE_STATE(s_req_first_http_minor);
1204 if (UNLIKELY(!IS_NUM(ch))) {
1205 SET_ERRNO(HPE_INVALID_VERSION);
1209 parser->http_major *= 10;
1210 parser->http_major += ch - '0';
1212 if (UNLIKELY(parser->http_major > 999)) {
1213 SET_ERRNO(HPE_INVALID_VERSION);
1220 /* first digit of minor HTTP version */
1221 case s_req_first_http_minor:
1222 if (UNLIKELY(!IS_NUM(ch))) {
1223 SET_ERRNO(HPE_INVALID_VERSION);
1227 parser->http_minor = ch - '0';
1228 UPDATE_STATE(s_req_http_minor);
1231 /* minor HTTP version or end of request line */
1232 case s_req_http_minor:
1235 UPDATE_STATE(s_req_line_almost_done);
1240 UPDATE_STATE(s_header_field_start);
1244 /* XXX allow spaces after digit? */
1246 if (UNLIKELY(!IS_NUM(ch))) {
1247 SET_ERRNO(HPE_INVALID_VERSION);
1251 parser->http_minor *= 10;
1252 parser->http_minor += ch - '0';
1254 if (UNLIKELY(parser->http_minor > 999)) {
1255 SET_ERRNO(HPE_INVALID_VERSION);
1262 /* end of request line */
1263 case s_req_line_almost_done:
1265 if (UNLIKELY(ch != LF)) {
1266 SET_ERRNO(HPE_LF_EXPECTED);
1270 UPDATE_STATE(s_header_field_start);
1274 case s_header_field_start:
1277 UPDATE_STATE(s_headers_almost_done);
1282 /* they might be just sending \n instead of \r\n so this would be
1283 * the second \n to denote the end of headers*/
1284 UPDATE_STATE(s_headers_almost_done);
1291 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1298 UPDATE_STATE(s_header_field);
1302 parser->header_state = h_C;
1306 parser->header_state = h_matching_proxy_connection;
1310 parser->header_state = h_matching_transfer_encoding;
1314 parser->header_state = h_matching_upgrade;
1318 parser->header_state = h_general;
1324 case s_header_field:
1326 const char* start = p;
1327 for (; p != data + len; p++) {
1334 switch (parser->header_state) {
1340 parser->header_state = (c == 'o' ? h_CO : h_general);
1345 parser->header_state = (c == 'n' ? h_CON : h_general);
1352 parser->header_state = h_matching_connection;
1355 parser->header_state = h_matching_content_length;
1358 parser->header_state = h_general;
1365 case h_matching_connection:
1367 if (parser->index > sizeof(CONNECTION)-1
1368 || c != CONNECTION[parser->index]) {
1369 parser->header_state = h_general;
1370 } else if (parser->index == sizeof(CONNECTION)-2) {
1371 parser->header_state = h_connection;
1375 /* proxy-connection */
1377 case h_matching_proxy_connection:
1379 if (parser->index > sizeof(PROXY_CONNECTION)-1
1380 || c != PROXY_CONNECTION[parser->index]) {
1381 parser->header_state = h_general;
1382 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1383 parser->header_state = h_connection;
1387 /* content-length */
1389 case h_matching_content_length:
1391 if (parser->index > sizeof(CONTENT_LENGTH)-1
1392 || c != CONTENT_LENGTH[parser->index]) {
1393 parser->header_state = h_general;
1394 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1395 if (parser->flags & F_CONTENTLENGTH) {
1396 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1399 parser->header_state = h_content_length;
1400 parser->flags |= F_CONTENTLENGTH;
1404 /* transfer-encoding */
1406 case h_matching_transfer_encoding:
1408 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1409 || c != TRANSFER_ENCODING[parser->index]) {
1410 parser->header_state = h_general;
1411 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1412 parser->header_state = h_transfer_encoding;
1418 case h_matching_upgrade:
1420 if (parser->index > sizeof(UPGRADE)-1
1421 || c != UPGRADE[parser->index]) {
1422 parser->header_state = h_general;
1423 } else if (parser->index == sizeof(UPGRADE)-2) {
1424 parser->header_state = h_upgrade;
1429 case h_content_length:
1430 case h_transfer_encoding:
1432 if (ch != ' ') parser->header_state = h_general;
1436 assert(0 && "Unknown header_state");
1441 COUNT_HEADER_SIZE(p - start);
1443 if (p == data + len) {
1449 UPDATE_STATE(s_header_value_discard_ws);
1450 CALLBACK_DATA(header_field);
1454 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1458 case s_header_value_discard_ws:
1459 if (ch == ' ' || ch == '\t') break;
1462 UPDATE_STATE(s_header_value_discard_ws_almost_done);
1467 UPDATE_STATE(s_header_value_discard_lws);
1473 case s_header_value_start:
1477 UPDATE_STATE(s_header_value);
1482 switch (parser->header_state) {
1484 parser->flags |= F_UPGRADE;
1485 parser->header_state = h_general;
1488 case h_transfer_encoding:
1489 /* looking for 'Transfer-Encoding: chunked' */
1491 parser->header_state = h_matching_transfer_encoding_chunked;
1493 parser->header_state = h_general;
1497 case h_content_length:
1498 if (UNLIKELY(!IS_NUM(ch))) {
1499 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1503 parser->content_length = ch - '0';
1507 /* looking for 'Connection: keep-alive' */
1509 parser->header_state = h_matching_connection_keep_alive;
1510 /* looking for 'Connection: close' */
1511 } else if (c == 'c') {
1512 parser->header_state = h_matching_connection_close;
1513 } else if (c == 'u') {
1514 parser->header_state = h_matching_connection_upgrade;
1516 parser->header_state = h_matching_connection_token;
1520 /* Multi-value `Connection` header */
1521 case h_matching_connection_token_start:
1525 parser->header_state = h_general;
1531 case s_header_value:
1533 const char* start = p;
1534 enum header_states h_state = (enum header_states) parser->header_state;
1535 for (; p != data + len; p++) {
1538 UPDATE_STATE(s_header_almost_done);
1539 parser->header_state = h_state;
1540 CALLBACK_DATA(header_value);
1545 UPDATE_STATE(s_header_almost_done);
1546 COUNT_HEADER_SIZE(p - start);
1547 parser->header_state = h_state;
1548 CALLBACK_DATA_NOADVANCE(header_value);
1552 if (!lenient && !IS_HEADER_CHAR(ch)) {
1553 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1564 size_t limit = data + len - p;
1566 limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1568 p_cr = (const char*) memchr(p, CR, limit);
1569 p_lf = (const char*) memchr(p, LF, limit);
1571 if (p_lf != NULL && p_cr >= p_lf)
1575 } else if (UNLIKELY(p_lf != NULL)) {
1586 case h_transfer_encoding:
1587 assert(0 && "Shouldn't get here.");
1590 case h_content_length:
1594 if (ch == ' ') break;
1596 if (UNLIKELY(!IS_NUM(ch))) {
1597 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1598 parser->header_state = h_state;
1602 t = parser->content_length;
1606 /* Overflow? Test against a conservative limit for simplicity. */
1607 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1608 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1609 parser->header_state = h_state;
1613 parser->content_length = t;
1617 /* Transfer-Encoding: chunked */
1618 case h_matching_transfer_encoding_chunked:
1620 if (parser->index > sizeof(CHUNKED)-1
1621 || c != CHUNKED[parser->index]) {
1622 h_state = h_general;
1623 } else if (parser->index == sizeof(CHUNKED)-2) {
1624 h_state = h_transfer_encoding_chunked;
1628 case h_matching_connection_token_start:
1629 /* looking for 'Connection: keep-alive' */
1631 h_state = h_matching_connection_keep_alive;
1632 /* looking for 'Connection: close' */
1633 } else if (c == 'c') {
1634 h_state = h_matching_connection_close;
1635 } else if (c == 'u') {
1636 h_state = h_matching_connection_upgrade;
1637 } else if (STRICT_TOKEN(c)) {
1638 h_state = h_matching_connection_token;
1639 } else if (c == ' ' || c == '\t') {
1642 h_state = h_general;
1646 /* looking for 'Connection: keep-alive' */
1647 case h_matching_connection_keep_alive:
1649 if (parser->index > sizeof(KEEP_ALIVE)-1
1650 || c != KEEP_ALIVE[parser->index]) {
1651 h_state = h_matching_connection_token;
1652 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1653 h_state = h_connection_keep_alive;
1657 /* looking for 'Connection: close' */
1658 case h_matching_connection_close:
1660 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1661 h_state = h_matching_connection_token;
1662 } else if (parser->index == sizeof(CLOSE)-2) {
1663 h_state = h_connection_close;
1667 /* looking for 'Connection: upgrade' */
1668 case h_matching_connection_upgrade:
1670 if (parser->index > sizeof(UPGRADE) - 1 ||
1671 c != UPGRADE[parser->index]) {
1672 h_state = h_matching_connection_token;
1673 } else if (parser->index == sizeof(UPGRADE)-2) {
1674 h_state = h_connection_upgrade;
1678 case h_matching_connection_token:
1680 h_state = h_matching_connection_token_start;
1685 case h_transfer_encoding_chunked:
1686 if (ch != ' ') h_state = h_general;
1689 case h_connection_keep_alive:
1690 case h_connection_close:
1691 case h_connection_upgrade:
1693 if (h_state == h_connection_keep_alive) {
1694 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1695 } else if (h_state == h_connection_close) {
1696 parser->flags |= F_CONNECTION_CLOSE;
1697 } else if (h_state == h_connection_upgrade) {
1698 parser->flags |= F_CONNECTION_UPGRADE;
1700 h_state = h_matching_connection_token_start;
1702 } else if (ch != ' ') {
1703 h_state = h_matching_connection_token;
1708 UPDATE_STATE(s_header_value);
1709 h_state = h_general;
1713 parser->header_state = h_state;
1715 COUNT_HEADER_SIZE(p - start);
1717 if (p == data + len)
1722 case s_header_almost_done:
1724 if (UNLIKELY(ch != LF)) {
1725 SET_ERRNO(HPE_LF_EXPECTED);
1729 UPDATE_STATE(s_header_value_lws);
1733 case s_header_value_lws:
1735 if (ch == ' ' || ch == '\t') {
1736 UPDATE_STATE(s_header_value_start);
1740 /* finished the header */
1741 switch (parser->header_state) {
1742 case h_connection_keep_alive:
1743 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1745 case h_connection_close:
1746 parser->flags |= F_CONNECTION_CLOSE;
1748 case h_transfer_encoding_chunked:
1749 parser->flags |= F_CHUNKED;
1751 case h_connection_upgrade:
1752 parser->flags |= F_CONNECTION_UPGRADE;
1758 UPDATE_STATE(s_header_field_start);
1762 case s_header_value_discard_ws_almost_done:
1764 STRICT_CHECK(ch != LF);
1765 UPDATE_STATE(s_header_value_discard_lws);
1769 case s_header_value_discard_lws:
1771 if (ch == ' ' || ch == '\t') {
1772 UPDATE_STATE(s_header_value_discard_ws);
1775 switch (parser->header_state) {
1776 case h_connection_keep_alive:
1777 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1779 case h_connection_close:
1780 parser->flags |= F_CONNECTION_CLOSE;
1782 case h_connection_upgrade:
1783 parser->flags |= F_CONNECTION_UPGRADE;
1785 case h_transfer_encoding_chunked:
1786 parser->flags |= F_CHUNKED;
1792 /* header value was empty */
1794 UPDATE_STATE(s_header_field_start);
1795 CALLBACK_DATA_NOADVANCE(header_value);
1800 case s_headers_almost_done:
1802 STRICT_CHECK(ch != LF);
1804 if (parser->flags & F_TRAILING) {
1805 /* End of a chunked request */
1806 UPDATE_STATE(s_message_done);
1807 CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1811 /* Cannot use chunked encoding and a content-length header together
1812 per the HTTP specification. */
1813 if ((parser->flags & F_CHUNKED) &&
1814 (parser->flags & F_CONTENTLENGTH)) {
1815 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1819 UPDATE_STATE(s_headers_done);
1821 /* Set this here so that on_headers_complete() callbacks can see it */
1823 ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) ==
1824 (F_UPGRADE | F_CONNECTION_UPGRADE) ||
1825 parser->method == HTTP_CONNECT);
1827 /* Here we call the headers_complete callback. This is somewhat
1828 * different than other callbacks because if the user returns 1, we
1829 * will interpret that as saying that this message has no body. This
1830 * is needed for the annoying case of recieving a response to a HEAD
1833 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1834 * we have to simulate it by handling a change in errno below.
1836 if (settings->on_headers_complete) {
1837 switch (settings->on_headers_complete(parser)) {
1842 parser->flags |= F_SKIPBODY;
1846 SET_ERRNO(HPE_CB_headers_complete);
1847 RETURN(p - data); /* Error */
1851 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1858 case s_headers_done:
1860 STRICT_CHECK(ch != LF);
1864 int hasBody = parser->flags & F_CHUNKED ||
1865 (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1866 if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1867 (parser->flags & F_SKIPBODY) || !hasBody)) {
1868 /* Exit, the rest of the message is in a different protocol. */
1869 UPDATE_STATE(NEW_MESSAGE());
1870 CALLBACK_NOTIFY(message_complete);
1871 RETURN((p - data) + 1);
1874 if (parser->flags & F_SKIPBODY) {
1875 UPDATE_STATE(NEW_MESSAGE());
1876 CALLBACK_NOTIFY(message_complete);
1877 } else if (parser->flags & F_CHUNKED) {
1878 /* chunked encoding - ignore Content-Length header */
1879 UPDATE_STATE(s_chunk_size_start);
1881 if (parser->content_length == 0) {
1882 /* Content-Length header given but zero: Content-Length: 0\r\n */
1883 UPDATE_STATE(NEW_MESSAGE());
1884 CALLBACK_NOTIFY(message_complete);
1885 } else if (parser->content_length != ULLONG_MAX) {
1886 /* Content-Length header given and non-zero */
1887 UPDATE_STATE(s_body_identity);
1889 if (parser->type == HTTP_REQUEST ||
1890 !http_message_needs_eof(parser)) {
1891 /* Assume content-length 0 - read the next */
1892 UPDATE_STATE(NEW_MESSAGE());
1893 CALLBACK_NOTIFY(message_complete);
1895 /* Read body until EOF */
1896 UPDATE_STATE(s_body_identity_eof);
1904 case s_body_identity:
1906 uint64_t to_read = MIN(parser->content_length,
1907 (uint64_t) ((data + len) - p));
1909 assert(parser->content_length != 0
1910 && parser->content_length != ULLONG_MAX);
1912 /* The difference between advancing content_length and p is because
1913 * the latter will automaticaly advance on the next loop iteration.
1914 * Further, if content_length ends up at 0, we want to see the last
1915 * byte again for our message complete callback.
1918 parser->content_length -= to_read;
1921 if (parser->content_length == 0) {
1922 UPDATE_STATE(s_message_done);
1924 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1926 * The alternative to doing this is to wait for the next byte to
1927 * trigger the data callback, just as in every other case. The
1928 * problem with this is that this makes it difficult for the test
1929 * harness to distinguish between complete-on-EOF and
1930 * complete-on-length. It's not clear that this distinction is
1931 * important for applications, but let's keep it for now.
1933 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1940 /* read until EOF */
1941 case s_body_identity_eof:
1947 case s_message_done:
1948 UPDATE_STATE(NEW_MESSAGE());
1949 CALLBACK_NOTIFY(message_complete);
1950 if (parser->upgrade) {
1951 /* Exit, the rest of the message is in a different protocol. */
1952 RETURN((p - data) + 1);
1956 case s_chunk_size_start:
1958 assert(parser->nread == 1);
1959 assert(parser->flags & F_CHUNKED);
1961 unhex_val = unhex[(unsigned char)ch];
1962 if (UNLIKELY(unhex_val == -1)) {
1963 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1967 parser->content_length = unhex_val;
1968 UPDATE_STATE(s_chunk_size);
1976 assert(parser->flags & F_CHUNKED);
1979 UPDATE_STATE(s_chunk_size_almost_done);
1983 unhex_val = unhex[(unsigned char)ch];
1985 if (unhex_val == -1) {
1986 if (ch == ';' || ch == ' ') {
1987 UPDATE_STATE(s_chunk_parameters);
1991 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1995 t = parser->content_length;
1999 /* Overflow? Test against a conservative limit for simplicity. */
2000 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
2001 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
2005 parser->content_length = t;
2009 case s_chunk_parameters:
2011 assert(parser->flags & F_CHUNKED);
2012 /* just ignore this shit. TODO check for overflow */
2014 UPDATE_STATE(s_chunk_size_almost_done);
2020 case s_chunk_size_almost_done:
2022 assert(parser->flags & F_CHUNKED);
2023 STRICT_CHECK(ch != LF);
2027 if (parser->content_length == 0) {
2028 parser->flags |= F_TRAILING;
2029 UPDATE_STATE(s_header_field_start);
2031 UPDATE_STATE(s_chunk_data);
2033 CALLBACK_NOTIFY(chunk_header);
2039 uint64_t to_read = MIN(parser->content_length,
2040 (uint64_t) ((data + len) - p));
2042 assert(parser->flags & F_CHUNKED);
2043 assert(parser->content_length != 0
2044 && parser->content_length != ULLONG_MAX);
2046 /* See the explanation in s_body_identity for why the content
2047 * length and data pointers are managed this way.
2050 parser->content_length -= to_read;
2053 if (parser->content_length == 0) {
2054 UPDATE_STATE(s_chunk_data_almost_done);
2060 case s_chunk_data_almost_done:
2061 assert(parser->flags & F_CHUNKED);
2062 assert(parser->content_length == 0);
2063 STRICT_CHECK(ch != CR);
2064 UPDATE_STATE(s_chunk_data_done);
2065 CALLBACK_DATA(body);
2068 case s_chunk_data_done:
2069 assert(parser->flags & F_CHUNKED);
2070 STRICT_CHECK(ch != LF);
2072 UPDATE_STATE(s_chunk_size_start);
2073 CALLBACK_NOTIFY(chunk_complete);
2077 assert(0 && "unhandled state");
2078 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2083 /* Run callbacks for any marks that we have leftover after we ran our of
2084 * bytes. There should be at most one of these set, so it's OK to invoke
2085 * them in series (unset marks will not result in callbacks).
2087 * We use the NOADVANCE() variety of callbacks here because 'p' has already
2088 * overflowed 'data' and this allows us to correct for the off-by-one that
2089 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2090 * value that's in-bounds).
2093 assert(((header_field_mark ? 1 : 0) +
2094 (header_value_mark ? 1 : 0) +
2095 (url_mark ? 1 : 0) +
2096 (body_mark ? 1 : 0) +
2097 (status_mark ? 1 : 0)) <= 1);
2099 CALLBACK_DATA_NOADVANCE(header_field);
2100 CALLBACK_DATA_NOADVANCE(header_value);
2101 CALLBACK_DATA_NOADVANCE(url);
2102 CALLBACK_DATA_NOADVANCE(body);
2103 CALLBACK_DATA_NOADVANCE(status);
2108 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2109 SET_ERRNO(HPE_UNKNOWN);
2116 /* Does the parser need to see an EOF to find the end of the message? */
2118 http_message_needs_eof (const http_parser *parser)
2120 if (parser->type == HTTP_REQUEST) {
2124 /* See RFC 2616 section 4.4 */
2125 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2126 parser->status_code == 204 || /* No Content */
2127 parser->status_code == 304 || /* Not Modified */
2128 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2132 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2141 http_should_keep_alive (const http_parser *parser)
2143 if (parser->http_major > 0 && parser->http_minor > 0) {
2145 if (parser->flags & F_CONNECTION_CLOSE) {
2149 /* HTTP/1.0 or earlier */
2150 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2155 return !http_message_needs_eof(parser);
2160 http_method_str (enum http_method m)
2162 return ELEM_AT(method_strings, m, "<unknown>");
2167 http_parser_init (http_parser *parser, enum http_parser_type t)
2169 void *data = parser->data; /* preserve application data */
2170 memset(parser, 0, sizeof(*parser));
2171 parser->data = data;
2173 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2174 parser->http_errno = HPE_OK;
2178 http_parser_settings_init(http_parser_settings *settings)
2180 memset(settings, 0, sizeof(*settings));
2184 http_errno_name(enum http_errno err) {
2185 assert(((size_t) err) <
2186 (sizeof(http_strerror_tab) / sizeof(http_strerror_tab[0])));
2187 return http_strerror_tab[err].name;
2191 http_errno_description(enum http_errno err) {
2192 assert(((size_t) err) <
2193 (sizeof(http_strerror_tab) / sizeof(http_strerror_tab[0])));
2194 return http_strerror_tab[err].description;
2197 static enum http_host_state
2198 http_parse_host_char(enum http_host_state s, const char ch) {
2200 case s_http_userinfo:
2201 case s_http_userinfo_start:
2203 return s_http_host_start;
2206 if (IS_USERINFO_CHAR(ch)) {
2207 return s_http_userinfo;
2211 case s_http_host_start:
2213 return s_http_host_v6_start;
2216 if (IS_HOST_CHAR(ch)) {
2223 if (IS_HOST_CHAR(ch)) {
2228 case s_http_host_v6_end:
2230 return s_http_host_port_start;
2235 case s_http_host_v6:
2237 return s_http_host_v6_end;
2241 case s_http_host_v6_start:
2242 if (IS_HEX(ch) || ch == ':' || ch == '.') {
2243 return s_http_host_v6;
2248 case s_http_host_port:
2249 case s_http_host_port_start:
2251 return s_http_host_port;
2259 return s_http_host_dead;
2263 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2264 enum http_host_state s;
2267 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2269 u->field_data[UF_HOST].len = 0;
2271 s = found_at ? s_http_userinfo_start : s_http_host_start;
2273 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2274 enum http_host_state new_s = http_parse_host_char(s, *p);
2276 if (new_s == s_http_host_dead) {
2282 if (s != s_http_host) {
2283 u->field_data[UF_HOST].off = p - buf;
2285 u->field_data[UF_HOST].len++;
2288 case s_http_host_v6:
2289 if (s != s_http_host_v6) {
2290 u->field_data[UF_HOST].off = p - buf;
2292 u->field_data[UF_HOST].len++;
2295 case s_http_host_port:
2296 if (s != s_http_host_port) {
2297 u->field_data[UF_PORT].off = p - buf;
2298 u->field_data[UF_PORT].len = 0;
2299 u->field_set |= (1 << UF_PORT);
2301 u->field_data[UF_PORT].len++;
2304 case s_http_userinfo:
2305 if (s != s_http_userinfo) {
2306 u->field_data[UF_USERINFO].off = p - buf ;
2307 u->field_data[UF_USERINFO].len = 0;
2308 u->field_set |= (1 << UF_USERINFO);
2310 u->field_data[UF_USERINFO].len++;
2319 /* Make sure we don't end somewhere unexpected */
2321 case s_http_host_start:
2322 case s_http_host_v6_start:
2323 case s_http_host_v6:
2324 case s_http_host_port_start:
2325 case s_http_userinfo:
2326 case s_http_userinfo_start:
2336 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2337 struct http_parser_url *u)
2341 enum http_parser_url_fields uf, old_uf;
2344 u->port = u->field_set = 0;
2345 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2348 for (p = buf; p < buf + buflen; p++) {
2349 s = parse_url_char(s, *p);
2351 /* Figure out the next field that we're operating on */
2356 /* Skip delimeters */
2357 case s_req_schema_slash:
2358 case s_req_schema_slash_slash:
2359 case s_req_server_start:
2360 case s_req_query_string_start:
2361 case s_req_fragment_start:
2368 case s_req_server_with_at:
2380 case s_req_query_string:
2384 case s_req_fragment:
2389 assert(!"Unexpected state");
2393 /* Nothing's changed; soldier on */
2395 u->field_data[uf].len++;
2399 u->field_data[uf].off = p - buf;
2400 u->field_data[uf].len = 1;
2402 u->field_set |= (1 << uf);
2406 /* host must be present if there is a schema */
2407 /* parsing http:///toto will fail */
2408 if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2409 if (http_parse_host(buf, u, found_at) != 0) {
2414 /* CONNECT requests can only contain "hostname:port" */
2415 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2419 if (u->field_set & (1 << UF_PORT)) {
2420 /* Don't bother with endp; we've already validated the string */
2421 unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2423 /* Ports have a max value of 2^16 */
2428 u->port = (uint16_t) v;
2435 http_parser_pause(http_parser *parser, int paused) {
2436 /* Users should only be pausing/unpausing a parser that is not in an error
2437 * state. In non-debug builds, there's not much that we can do about this
2438 * other than ignore it.
2440 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2441 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2442 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2444 assert(0 && "Attempting to pause parser in error state");
2449 http_body_is_final(const struct http_parser *parser) {
2450 return parser->state == s_message_done;
2454 http_parser_version(void) {
2455 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2456 HTTP_PARSER_VERSION_MINOR * 0x00100 |
2457 HTTP_PARSER_VERSION_PATCH * 0x00001;