2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 // Official repository: https://github.com/boostorg/beast
10 #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP
11 #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP
13 #include <boost/beast/core/detail/config.hpp>
14 #include <boost/beast/core/error.hpp>
15 #include <boost/beast/core/string.hpp>
16 #include <boost/beast/http/field.hpp>
17 #include <boost/beast/http/verb.hpp>
18 #include <boost/beast/http/detail/basic_parser.hpp>
19 #include <boost/asio/buffer.hpp>
20 #include <boost/optional.hpp>
21 #include <boost/assert.hpp>
24 #include <type_traits>
31 /** A parser for decoding HTTP/1 wire format messages.
33 This parser is designed to efficiently parse messages in the
34 HTTP/1 wire format. It allocates no memory when input is
35 presented as a single contiguous buffer, and uses minimal
36 state. It will handle chunked encoding and it understands
37 the semantics of the Connection, Content-Length, and Upgrade
39 The parser is optimized for the case where the input buffer
40 sequence consists of a single contiguous buffer. The
41 @ref beast::basic_flat_buffer class is provided, which guarantees
42 that the input sequence of the stream buffer will be represented
43 by exactly one contiguous buffer. To ensure the optimum performance
44 of the parser, use @ref beast::basic_flat_buffer with HTTP algorithms
45 such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some.
46 Alternatively, the caller may use custom techniques to ensure that
47 the structured portion of the HTTP message (header or chunk header)
48 is contained in a linear buffer.
50 The interface to the parser uses virtual member functions.
51 To use this class, derive your type from @ref basic_parser. When
52 bytes are presented, the implementation will make a series of zero
53 or more calls to virtual functions, which the derived class must
56 Every virtual function must be provided by the derived class,
57 or else a compilation error will be generated. The implementation
58 will make sure that `ec` is clear before each virtual function
59 is invoked. If a virtual function sets an error, it is propagated
60 out of the parser to the caller.
62 @tparam isRequest A `bool` indicating whether the parser will be
63 presented with request or response message.
65 @note If the parser encounters a field value with obs-fold
66 longer than 4 kilobytes in length, an error is generated.
68 template<bool isRequest>
70 : private detail::basic_parser_base
72 std::uint64_t body_limit_ =
73 default_body_limit(is_request{}); // max payload body
74 std::uint64_t len_ = 0; // size of chunk or body
75 std::uint64_t len0_ = 0; // content length if known
76 std::unique_ptr<char[]> buf_; // temp storage
77 std::size_t buf_len_ = 0; // size of buf_
78 std::size_t skip_ = 0; // resume search here
79 std::uint32_t header_limit_ = 8192; // max header size
80 unsigned short status_ = 0; // response status
81 state state_ = state::nothing_yet; // initial state
82 unsigned f_ = 0; // flags
84 // limit on the size of the stack flat buffer
85 static std::size_t constexpr max_stack_buffer = 8192;
87 // Message will be complete after reading header
88 static unsigned constexpr flagSkipBody = 1<< 0;
90 // Consume input buffers across semantic boundaries
91 static unsigned constexpr flagEager = 1<< 1;
93 // The parser has read at least one byte
94 static unsigned constexpr flagGotSome = 1<< 2;
96 // Message semantics indicate a body is expected.
97 // cleared if flagSkipBody set
99 static unsigned constexpr flagHasBody = 1<< 3;
101 static unsigned constexpr flagHTTP11 = 1<< 4;
102 static unsigned constexpr flagNeedEOF = 1<< 5;
103 static unsigned constexpr flagExpectCRLF = 1<< 6;
104 static unsigned constexpr flagConnectionClose = 1<< 7;
105 static unsigned constexpr flagConnectionUpgrade = 1<< 8;
106 static unsigned constexpr flagConnectionKeepAlive = 1<< 9;
107 static unsigned constexpr flagContentLength = 1<< 10;
108 static unsigned constexpr flagChunked = 1<< 11;
109 static unsigned constexpr flagUpgrade = 1<< 12;
110 static unsigned constexpr flagFinalChunk = 1<< 13;
114 default_body_limit(std::true_type)
116 // limit for requests
117 return 1 * 1024 * 1024; // 1MB
122 default_body_limit(std::false_type)
124 // limit for responses
125 return 8 * 1024 * 1024; // 8MB
128 template<bool OtherIsRequest>
129 friend class basic_parser;
131 friend class basic_parser_test;
134 /// Default constructor
135 basic_parser() = default;
141 After the move, the only valid operation on the
142 moved-from object is destruction.
144 basic_parser(basic_parser &&) = default;
147 basic_parser& operator=(basic_parser &&) = default;
150 /// `true` if this parser parses requests, `false` for responses.
152 std::integral_constant<bool, isRequest>;
155 virtual ~basic_parser() = default;
158 basic_parser(basic_parser const&) = delete;
161 basic_parser& operator=(basic_parser const&) = delete;
163 /// Returns `true` if the parser has received at least one byte of input.
167 return state_ != state::nothing_yet;
170 /** Returns `true` if the message is complete.
172 The message is complete after the full header is prduced
173 and one of the following is true:
175 @li The skip body option was set.
177 @li The semantics of the message indicate there is no body.
179 @li The semantics of the message indicate a body is expected,
180 and the entire body was parsed.
185 return state_ == state::complete;
188 /** Returns `true` if a the parser has produced the full header.
191 is_header_done() const
193 return state_ > state::fields;
196 /** Returns `true` if the message is an upgrade message.
198 @note The return value is undefined unless
199 @ref is_header_done would return `true`.
204 return (f_ & flagConnectionUpgrade) != 0;
207 /** Returns `true` if the last value for Transfer-Encoding is "chunked".
209 @note The return value is undefined unless
210 @ref is_header_done would return `true`.
215 return (f_ & flagChunked) != 0;
218 /** Returns `true` if the message has keep-alive connection semantics.
220 This function always returns `false` if @ref need_eof would return
223 @note The return value is undefined unless
224 @ref is_header_done would return `true`.
229 /** Returns the optional value of Content-Length if known.
231 @note The return value is undefined unless
232 @ref is_header_done would return `true`.
234 boost::optional<std::uint64_t>
235 content_length() const;
237 /** Returns the remaining content length if known
239 If the message header specifies a Content-Length,
240 the return value will be the number of bytes remaining
241 in the payload body have not yet been parsed.
243 @note The return value is undefined unless
244 @ref is_header_done would return `true`.
246 boost::optional<std::uint64_t>
247 content_length_remaining() const;
249 /** Returns `true` if the message semantics require an end of file.
251 Depending on the contents of the header, the parser may
252 require and end of file notification to know where the end
253 of the body lies. If this function returns `true` it will be
254 necessary to call @ref put_eof when there will never be additional
260 return (f_ & flagNeedEOF) != 0;
263 /** Set the limit on the payload body.
265 This function sets the maximum allowed size of the payload body,
266 before any encodings except chunked have been removed. Depending
267 on the message semantics, one of these cases will apply:
269 @li The Content-Length is specified and exceeds the limit. In
270 this case the result @ref error::body_limit is returned
271 immediately after the header is parsed.
273 @li The Content-Length is unspecified and the chunked encoding
274 is not specified as the last encoding. In this case the end of
275 message is determined by the end of file indicator on the
276 associated stream or input source. If a sufficient number of
277 body payload octets are presented to the parser to exceed the
278 configured limit, the parse fails with the result
279 @ref error::body_limit
281 @li The Transfer-Encoding specifies the chunked encoding as the
282 last encoding. In this case, when the number of payload body
283 octets produced by removing the chunked encoding exceeds
284 the configured limit, the parse fails with the result
285 @ref error::body_limit.
287 Setting the limit after any body octets have been parsed
288 results in undefined behavior.
290 The default limit is 1MB for requests and 8MB for responses.
292 @param v The payload body limit to set
295 body_limit(std::uint64_t v)
300 /** Set a limit on the total size of the header.
302 This function sets the maximum allowed size of the header
303 including all field name, value, and delimiter characters
304 and also including the CRLF sequences in the serialized
305 input. If the end of the header is not found within the
306 limit of the header size, the error @ref error::header_limit
307 is returned by @ref put.
309 Setting the limit after any header octets have been parsed
310 results in undefined behavior.
313 header_limit(std::uint32_t v)
318 /// Returns `true` if the eager parse option is set.
322 return (f_ & flagEager) != 0;
325 /** Set the eager parse option.
327 Normally the parser returns after successfully parsing a structured
328 element (header, chunk header, or chunk body) even if there are octets
329 remaining in the input. This is necessary when attempting to parse the
330 header first, or when the caller wants to inspect information which may
331 be invalidated by subsequent parsing, such as a chunk extension. The
332 `eager` option controls whether the parser keeps going after parsing
333 structured element if there are octets remaining in the buffer and no
334 error occurs. This option is automatically set or cleared during certain
335 stream operations to improve performance with no change in functionality.
337 The default setting is `false`.
339 @param v `true` to set the eager parse option or `false` to disable it.
350 /// Returns `true` if the skip parse option is set.
354 return (f_ & flagSkipBody) != 0;
357 /** Set the skip parse option.
359 This option controls whether or not the parser expects to see an HTTP
360 body, regardless of the presence or absence of certain fields such as
361 Content-Length or a chunked Transfer-Encoding. Depending on the request,
362 some responses do not carry a body. For example, a 200 response to a
363 CONNECT request from a tunneling proxy, or a response to a HEAD request.
364 In these cases, callers may use this function inform the parser that
365 no body is expected. The parser will consider the message complete
366 after the header has been received.
368 @param v `true` to set the skip body option or `false` to disable it.
370 @note This function must called before any bytes are processed.
375 /** Write a buffer sequence to the parser.
377 This function attempts to incrementally parse the HTTP
378 message data stored in the caller provided buffers. Upon
379 success, a positive return value indicates that the parser
380 made forward progress, consuming that number of
383 In some cases there may be an insufficient number of octets
384 in the input buffer in order to make forward progress. This
385 is indicated by the code @ref error::need_more. When
386 this happens, the caller should place additional bytes into
387 the buffer sequence and call @ref put again.
389 The error code @ref error::need_more is special. When this
390 error is returned, a subsequent call to @ref put may succeed
391 if the buffers have been updated. Otherwise, upon error
392 the parser may not be restarted.
394 @param buffers An object meeting the requirements of
395 <em>ConstBufferSequence</em> that represents the next chunk of
396 message data. If the length of this buffer sequence is
397 one, the implementation will not allocate additional memory.
398 The class @ref beast::basic_flat_buffer is provided as one way to
399 meet this requirement
401 @param ec Set to the error, if any occurred.
403 @return The number of octets consumed in the buffer
404 sequence. The caller should remove these octets even if the
407 template<class ConstBufferSequence>
409 put(ConstBufferSequence const& buffers, error_code& ec);
411 #if ! BOOST_BEAST_DOXYGEN
413 put(net::const_buffer buffer,
417 /** Inform the parser that the end of stream was reached.
419 In certain cases, HTTP needs to know where the end of
420 the stream is. For example, sometimes servers send
421 responses without Content-Length and expect the client
422 to consume input (for the body) until EOF. Callbacks
423 and errors will still be processed as usual.
425 This is typically called when a read from the
426 underlying stream object sets the error code to
429 @note Only valid after parsing a complete header.
431 @param ec Set to the error, if any occurred.
434 put_eof(error_code& ec);
437 /** Called after receiving the request-line.
439 This virtual function is invoked after receiving a request-line
440 when parsing HTTP requests.
441 It can only be called when `isRequest == true`.
443 @param method The verb enumeration. If the method string is not
444 one of the predefined strings, this value will be @ref verb::unknown.
446 @param method_str The unmodified string representing the verb.
448 @param target The request-target.
450 @param version The HTTP-version. This will be 10 for HTTP/1.0,
453 @param ec An output parameter which the function may set to indicate
454 an error. The error will be clear before this function is invoked.
460 string_view method_str,
465 /** Called after receiving the status-line.
467 This virtual function is invoked after receiving a status-line
468 when parsing HTTP responses.
469 It can only be called when `isRequest == false`.
471 @param code The numeric status code.
473 @param reason The reason-phrase. Note that this value is
474 now obsolete, and only provided for historical or diagnostic
477 @param version The HTTP-version. This will be 10 for HTTP/1.0,
480 @param ec An output parameter which the function may set to indicate
481 an error. The error will be clear before this function is invoked.
491 /** Called once for each complete field in the HTTP header.
493 This virtual function is invoked for each field that is received
494 while parsing an HTTP message.
496 @param name The known field enum value. If the name of the field
497 is not recognized, this value will be @ref field::unknown.
499 @param name_string The exact name of the field as received from
500 the input, represented as a string.
502 @param value A string holding the value of the field.
504 @param ec An output parameter which the function may set to indicate
505 an error. The error will be clear before this function is invoked.
511 string_view name_string,
515 /** Called once after the complete HTTP header is received.
517 This virtual function is invoked once, after the complete HTTP
518 header is received while parsing a message.
520 @param ec An output parameter which the function may set to indicate
521 an error. The error will be clear before this function is invoked.
525 on_header_impl(error_code& ec) = 0;
527 /** Called once before the body is processed.
529 This virtual function is invoked once, before the content body is
530 processed (but after the complete header is received).
532 @param content_length A value representing the content length in
533 bytes if the length is known (this can include a zero length).
534 Otherwise, the value will be `boost::none`.
536 @param ec An output parameter which the function may set to indicate
537 an error. The error will be clear before this function is invoked.
542 boost::optional<std::uint64_t> const& content_length,
545 /** Called each time additional data is received representing the content body.
547 This virtual function is invoked for each piece of the body which is
548 received while parsing of a message. This function is only used when
549 no chunked transfer encoding is present.
551 @param body A string holding the additional body contents. This may
552 contain nulls or unprintable characters.
554 @param ec An output parameter which the function may set to indicate
555 an error. The error will be clear before this function is invoked.
557 @see on_chunk_body_impl
565 /** Called each time a new chunk header of a chunk encoded body is received.
567 This function is invoked each time a new chunk header is received.
568 The function is only used when the chunked transfer encoding is present.
570 @param size The size of this chunk, in bytes.
572 @param extensions A string containing the entire chunk extensions.
573 This may be empty, indicating no extensions are present.
575 @param ec An output parameter which the function may set to indicate
576 an error. The error will be clear before this function is invoked.
580 on_chunk_header_impl(
582 string_view extensions,
585 /** Called each time additional data is received representing part of a body chunk.
587 This virtual function is invoked for each piece of the body which is
588 received while parsing of a message. This function is only used when
589 no chunked transfer encoding is present.
591 @param remain The number of bytes remaining in this chunk. This includes
592 the contents of passed `body`. If this value is zero, then this represents
595 @param body A string holding the additional body contents. This may
596 contain nulls or unprintable characters.
598 @param ec An output parameter which the function may set to indicate
599 an error. The error will be clear before this function is invoked.
601 @return This function should return the number of bytes actually consumed
602 from the `body` value. Any bytes that are not consumed on this call
603 will be presented in a subsequent call.
610 std::uint64_t remain,
614 /** Called once when the complete message is received.
616 This virtual function is invoked once, after successfully parsing
617 a complete HTTP message.
619 @param ec An output parameter which the function may set to indicate
620 an error. The error will be clear before this function is invoked.
624 on_finish_impl(error_code& ec) = 0;
627 template<class ConstBufferSequence>
631 ConstBufferSequence const& buffers,
636 char const* p, std::size_t n,
641 char const*& p, char const* last,
642 error_code& ec, std::true_type);
646 char const*& p, char const* last,
647 error_code& ec, std::false_type);
651 char const*& p, char const* last,
656 error_code& ec, std::true_type);
660 error_code& ec, std::false_type);
663 parse_body(char const*& p,
664 std::size_t n, error_code& ec);
667 parse_body_to_eof(char const*& p,
668 std::size_t n, error_code& ec);
671 parse_chunk_header(char const*& p,
672 std::size_t n, error_code& ec);
675 parse_chunk_body(char const*& p,
676 std::size_t n, error_code& ec);
680 string_view value, error_code& ec);
687 #include <boost/beast/http/impl/basic_parser.hpp>
688 #ifdef BOOST_BEAST_HEADER_ONLY
689 #include <boost/beast/http/impl/basic_parser.ipp>