2 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile Inc. http://www.torchmobile.com/
5 * Copyright (C) 2009 Google Inc. All rights reserved.
6 * Copyright (C) 2011 Apple Inc. All Rights Reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
18 * its contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
25 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "platform/network/HTTPParsers.h"
36 #include "wtf/DateMath.h"
37 #include "wtf/text/CString.h"
38 #include "wtf/text/StringBuilder.h"
39 #include "wtf/text/WTFString.h"
40 #include "wtf/unicode/CharacterNames.h"
46 // true if there is more to parse, after incrementing pos past whitespace.
47 // Note: Might return pos == str.length()
48 static inline bool skipWhiteSpace(const String& str, unsigned& pos, bool fromHttpEquivMeta)
50 unsigned len = str.length();
52 if (fromHttpEquivMeta) {
53 while (pos < len && str[pos] <= ' ')
56 while (pos < len && (str[pos] == '\t' || str[pos] == ' '))
63 // Returns true if the function can match the whole token (case insensitive)
64 // incrementing pos on match, otherwise leaving pos unchanged.
65 // Note: Might return pos == str.length()
66 static inline bool skipToken(const String& str, unsigned& pos, const char* token)
68 unsigned len = str.length();
69 unsigned current = pos;
71 while (current < len && *token) {
72 if (toASCIILower(str[current]) != *token++)
84 // True if the expected equals sign is seen and there is more to follow.
85 static inline bool skipEquals(const String& str, unsigned &pos)
87 return skipWhiteSpace(str, pos, false) && str[pos++] == '=' && skipWhiteSpace(str, pos, false);
90 // True if a value present, incrementing pos to next space or semicolon, if any.
91 // Note: might return pos == str.length().
92 static inline bool skipValue(const String& str, unsigned& pos)
95 unsigned len = str.length();
97 if (str[pos] == ' ' || str[pos] == '\t' || str[pos] == ';')
104 bool isValidHTTPHeaderValue(const String& name)
106 // FIXME: This should really match name against
107 // field-value in section 4.2 of RFC 2616.
109 return name.containsOnlyLatin1() && !name.contains('\r') && !name.contains('\n');
112 // See RFC 2616, Section 2.2.
113 bool isValidHTTPToken(const String& characters)
115 if (characters.isEmpty())
117 for (unsigned i = 0; i < characters.length(); ++i) {
118 UChar c = characters[i];
119 if (c <= 0x20 || c >= 0x7F
120 || c == '(' || c == ')' || c == '<' || c == '>' || c == '@'
121 || c == ',' || c == ';' || c == ':' || c == '\\' || c == '"'
122 || c == '/' || c == '[' || c == ']' || c == '?' || c == '='
123 || c == '{' || c == '}')
129 static const size_t maxInputSampleSize = 128;
130 static String trimInputSample(const char* p, size_t length)
132 if (length > maxInputSampleSize)
133 return String(p, maxInputSampleSize) + horizontalEllipsis;
134 return String(p, length);
137 ContentDispositionType contentDispositionType(const String& contentDisposition)
139 if (contentDisposition.isEmpty())
140 return ContentDispositionNone;
142 Vector<String> parameters;
143 contentDisposition.split(';', parameters);
145 if (parameters.isEmpty())
146 return ContentDispositionNone;
148 String dispositionType = parameters[0];
149 dispositionType.stripWhiteSpace();
151 if (equalIgnoringCase(dispositionType, "inline"))
152 return ContentDispositionInline;
154 // Some broken sites just send bogus headers like
156 // Content-Disposition: ; filename="file"
157 // Content-Disposition: filename="file"
158 // Content-Disposition: name="file"
160 // without a disposition token... screen those out.
161 if (!isValidHTTPToken(dispositionType))
162 return ContentDispositionNone;
164 // We have a content-disposition of "attachment" or unknown.
165 // RFC 2183, section 2.8 says that an unknown disposition
166 // value should be treated as "attachment"
167 return ContentDispositionAttachment;
170 bool parseHTTPRefresh(const String& refresh, bool fromHttpEquivMeta, double& delay, String& url)
172 unsigned len = refresh.length();
175 if (!skipWhiteSpace(refresh, pos, fromHttpEquivMeta))
178 while (pos != len && refresh[pos] != ',' && refresh[pos] != ';')
181 if (pos == len) { // no URL
184 delay = refresh.stripWhiteSpace().toDouble(&ok);
188 delay = refresh.left(pos).stripWhiteSpace().toDouble(&ok);
193 skipWhiteSpace(refresh, pos, fromHttpEquivMeta);
194 unsigned urlStartPos = pos;
195 if (refresh.find("url", urlStartPos, false) == urlStartPos) {
197 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
198 if (refresh[urlStartPos] == '=') {
200 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
202 urlStartPos = pos; // e.g. "Refresh: 0; url.html"
206 unsigned urlEndPos = len;
208 if (refresh[urlStartPos] == '"' || refresh[urlStartPos] == '\'') {
209 UChar quotationMark = refresh[urlStartPos];
211 while (urlEndPos > urlStartPos) {
213 if (refresh[urlEndPos] == quotationMark)
217 // https://bugs.webkit.org/show_bug.cgi?id=27868
218 // Sometimes there is no closing quote for the end of the URL even though there was an opening quote.
219 // If we looped over the entire alleged URL string back to the opening quote, just go ahead and use everything
220 // after the opening quote instead.
221 if (urlEndPos == urlStartPos)
225 url = refresh.substring(urlStartPos, urlEndPos - urlStartPos).stripWhiteSpace();
230 double parseDate(const String& value)
232 return parseDateFromNullTerminatedCharacters(value.utf8().data());
235 // FIXME: This function doesn't comply with RFC 6266.
236 // For example, this function doesn't handle the interaction between " and ;
237 // that arises from quoted-string, nor does this function properly unquote
238 // attribute values. Further this function appears to process parameter names
239 // in a case-sensitive manner. (There are likely other bugs as well.)
240 String filenameFromHTTPContentDisposition(const String& value)
242 Vector<String> keyValuePairs;
243 value.split(';', keyValuePairs);
245 unsigned length = keyValuePairs.size();
246 for (unsigned i = 0; i < length; i++) {
247 size_t valueStartPos = keyValuePairs[i].find('=');
248 if (valueStartPos == kNotFound)
251 String key = keyValuePairs[i].left(valueStartPos).stripWhiteSpace();
253 if (key.isEmpty() || key != "filename")
256 String value = keyValuePairs[i].substring(valueStartPos + 1).stripWhiteSpace();
258 // Remove quotes if there are any
259 if (value[0] == '\"')
260 value = value.substring(1, value.length() - 2);
268 AtomicString extractMIMETypeFromMediaType(const AtomicString& mediaType)
270 StringBuilder mimeType;
271 unsigned length = mediaType.length();
272 mimeType.reserveCapacity(length);
273 for (unsigned i = 0; i < length; i++) {
274 UChar c = mediaType[i];
279 // While RFC 2616 does not allow it, other browsers allow multiple values in the HTTP media
280 // type header field, Content-Type. In such cases, the media type string passed here may contain
281 // the multiple values separated by commas. For now, this code ignores text after the first comma,
282 // which prevents it from simply failing to parse such types altogether. Later for better
283 // compatibility we could consider using the first or last valid MIME type instead.
284 // See https://bugs.webkit.org/show_bug.cgi?id=25352 for more discussion.
288 // FIXME: The following is not correct. RFC 2616 allows linear white space before and
289 // after the MIME type, but not within the MIME type itself. And linear white space
290 // includes only a few specific ASCII characters; a small subset of isSpaceOrNewline.
291 // See https://bugs.webkit.org/show_bug.cgi?id=8644 for a bug tracking part of this.
292 if (isSpaceOrNewline(c))
298 if (mimeType.length() == length)
300 return mimeType.toAtomicString();
303 String extractCharsetFromMediaType(const String& mediaType)
306 findCharsetInMediaType(mediaType, pos, len);
307 return mediaType.substring(pos, len);
310 void findCharsetInMediaType(const String& mediaType, unsigned& charsetPos, unsigned& charsetLen, unsigned start)
316 unsigned length = mediaType.length();
318 while (pos < length) {
319 pos = mediaType.find("charset", pos, false);
320 if (pos == kNotFound || !pos) {
325 // is what we found a beginning of a word?
326 if (mediaType[pos-1] > ' ' && mediaType[pos-1] != ';') {
334 while (pos != length && mediaType[pos] <= ' ')
337 if (mediaType[pos++] != '=') // this "charset" substring wasn't a parameter name, but there may be others
340 while (pos != length && (mediaType[pos] <= ' ' || mediaType[pos] == '"' || mediaType[pos] == '\''))
343 // we don't handle spaces within quoted parameter values, because charset names cannot have any
344 unsigned endpos = pos;
345 while (pos != length && mediaType[endpos] > ' ' && mediaType[endpos] != '"' && mediaType[endpos] != '\'' && mediaType[endpos] != ';')
349 charsetLen = endpos - pos;
354 ReflectedXSSDisposition parseXSSProtectionHeader(const String& header, String& failureReason, unsigned& failurePosition, String& reportURL)
356 DEFINE_STATIC_LOCAL(String, failureReasonInvalidToggle, ("expected 0 or 1"));
357 DEFINE_STATIC_LOCAL(String, failureReasonInvalidSeparator, ("expected semicolon"));
358 DEFINE_STATIC_LOCAL(String, failureReasonInvalidEquals, ("expected equals sign"));
359 DEFINE_STATIC_LOCAL(String, failureReasonInvalidMode, ("invalid mode directive"));
360 DEFINE_STATIC_LOCAL(String, failureReasonInvalidReport, ("invalid report directive"));
361 DEFINE_STATIC_LOCAL(String, failureReasonDuplicateMode, ("duplicate mode directive"));
362 DEFINE_STATIC_LOCAL(String, failureReasonDuplicateReport, ("duplicate report directive"));
363 DEFINE_STATIC_LOCAL(String, failureReasonInvalidDirective, ("unrecognized directive"));
367 if (!skipWhiteSpace(header, pos, false))
368 return ReflectedXSSUnset;
370 if (header[pos] == '0')
371 return AllowReflectedXSS;
373 if (header[pos++] != '1') {
374 failureReason = failureReasonInvalidToggle;
375 return ReflectedXSSInvalid;
378 ReflectedXSSDisposition result = FilterReflectedXSS;
379 bool modeDirectiveSeen = false;
380 bool reportDirectiveSeen = false;
383 // At end of previous directive: consume whitespace, semicolon, and whitespace.
384 if (!skipWhiteSpace(header, pos, false))
387 if (header[pos++] != ';') {
388 failureReason = failureReasonInvalidSeparator;
389 failurePosition = pos;
390 return ReflectedXSSInvalid;
393 if (!skipWhiteSpace(header, pos, false))
396 // At start of next directive.
397 if (skipToken(header, pos, "mode")) {
398 if (modeDirectiveSeen) {
399 failureReason = failureReasonDuplicateMode;
400 failurePosition = pos;
401 return ReflectedXSSInvalid;
403 modeDirectiveSeen = true;
404 if (!skipEquals(header, pos)) {
405 failureReason = failureReasonInvalidEquals;
406 failurePosition = pos;
407 return ReflectedXSSInvalid;
409 if (!skipToken(header, pos, "block")) {
410 failureReason = failureReasonInvalidMode;
411 failurePosition = pos;
412 return ReflectedXSSInvalid;
414 result = BlockReflectedXSS;
415 } else if (skipToken(header, pos, "report")) {
416 if (reportDirectiveSeen) {
417 failureReason = failureReasonDuplicateReport;
418 failurePosition = pos;
419 return ReflectedXSSInvalid;
421 reportDirectiveSeen = true;
422 if (!skipEquals(header, pos)) {
423 failureReason = failureReasonInvalidEquals;
424 failurePosition = pos;
425 return ReflectedXSSInvalid;
427 size_t startPos = pos;
428 if (!skipValue(header, pos)) {
429 failureReason = failureReasonInvalidReport;
430 failurePosition = pos;
431 return ReflectedXSSInvalid;
433 reportURL = header.substring(startPos, pos - startPos);
434 failurePosition = startPos; // If later semantic check deems unacceptable.
436 failureReason = failureReasonInvalidDirective;
437 failurePosition = pos;
438 return ReflectedXSSInvalid;
443 ContentTypeOptionsDisposition parseContentTypeOptionsHeader(const String& header)
445 if (header.stripWhiteSpace().lower() == "nosniff")
446 return ContentTypeOptionsNosniff;
447 return ContentTypeOptionsNone;
450 String extractReasonPhraseFromHTTPStatusLine(const String& statusLine)
452 size_t spacePos = statusLine.find(' ');
453 // Remove status code from the status line.
454 spacePos = statusLine.find(' ', spacePos + 1);
455 return statusLine.substring(spacePos + 1);
458 XFrameOptionsDisposition parseXFrameOptionsHeader(const String& header)
460 XFrameOptionsDisposition result = XFrameOptionsNone;
462 if (header.isEmpty())
465 Vector<String> headers;
466 header.split(',', headers);
468 for (size_t i = 0; i < headers.size(); i++) {
469 String currentHeader = headers[i].stripWhiteSpace();
470 XFrameOptionsDisposition currentValue = XFrameOptionsNone;
471 if (equalIgnoringCase(currentHeader, "deny"))
472 currentValue = XFrameOptionsDeny;
473 else if (equalIgnoringCase(currentHeader, "sameorigin"))
474 currentValue = XFrameOptionsSameOrigin;
475 else if (equalIgnoringCase(currentHeader, "allowall"))
476 currentValue = XFrameOptionsAllowAll;
478 currentValue = XFrameOptionsInvalid;
480 if (result == XFrameOptionsNone)
481 result = currentValue;
482 else if (result != currentValue)
483 return XFrameOptionsConflict;
488 bool parseRange(const String& range, long long& rangeOffset, long long& rangeEnd, long long& rangeSuffixLength)
490 // The format of "Range" header is defined in RFC 2616 Section 14.35.1.
491 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1
492 // We don't support multiple range requests.
494 rangeOffset = rangeEnd = rangeSuffixLength = -1;
496 // The "bytes" unit identifier should be present.
497 static const char bytesStart[] = "bytes=";
498 if (!range.startsWith(bytesStart, false))
500 String byteRange = range.substring(sizeof(bytesStart) - 1);
502 // The '-' character needs to be present.
503 int index = byteRange.find('-');
507 // If the '-' character is at the beginning, the suffix length, which specifies the last N bytes, is provided.
511 String suffixLengthString = byteRange.substring(index + 1).stripWhiteSpace();
513 long long value = suffixLengthString.toInt64Strict(&ok);
515 rangeSuffixLength = value;
519 // Otherwise, the first-byte-position and the last-byte-position are provied.
523 String firstBytePosStr = byteRange.left(index).stripWhiteSpace();
525 long long firstBytePos = firstBytePosStr.toInt64Strict(&ok);
529 String lastBytePosStr = byteRange.substring(index + 1).stripWhiteSpace();
530 long long lastBytePos = -1;
531 if (!lastBytePosStr.isEmpty()) {
532 lastBytePos = lastBytePosStr.toInt64Strict(&ok);
537 if (firstBytePos < 0 || !(lastBytePos == -1 || lastBytePos >= firstBytePos))
540 rangeOffset = firstBytePos;
541 rangeEnd = lastBytePos;
545 // HTTP/1.1 - RFC 2616
546 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1
547 // Request-Line = Method SP Request-URI SP HTTP-Version CRLF
548 size_t parseHTTPRequestLine(const char* data, size_t length, String& failureReason, String& method, String& url, HTTPVersion& httpVersion)
552 httpVersion = Unknown;
554 const char* space1 = 0;
555 const char* space2 = 0;
557 size_t consumedLength;
559 for (p = data, consumedLength = 0; consumedLength < length; p++, consumedLength++) {
565 } else if (*p == '\n') {
570 // Haven't finished header line.
571 if (consumedLength == length) {
572 failureReason = "Incomplete Request Line";
576 // RequestLine does not contain 3 parts.
577 if (!space1 || !space2) {
578 failureReason = "Request Line does not appear to contain: <Method> <Url> <HTTPVersion>.";
582 // The line must end with "\r\n".
583 const char* end = p + 1;
584 if (*(end - 2) != '\r') {
585 failureReason = "Request line does not end with CRLF";
590 method = String(data, space1 - data); // For length subtract 1 for space, but add 1 for data being the first character.
593 url = String(space1 + 1, space2 - space1 - 1); // For length subtract 1 for space.
596 String httpVersionString(space2 + 1, end - space2 - 3); // For length subtract 1 for space, and 2 for "\r\n".
597 if (httpVersionString.length() != 8 || !httpVersionString.startsWith("HTTP/1."))
598 httpVersion = Unknown;
599 else if (httpVersionString[7] == '0')
600 httpVersion = HTTP_1_0;
601 else if (httpVersionString[7] == '1')
602 httpVersion = HTTP_1_1;
604 httpVersion = Unknown;
609 size_t parseHTTPHeader(const char* start, size_t length, String& failureReason, AtomicString& nameStr, AtomicString& valueStr)
611 const char* p = start;
612 const char* end = start + length;
619 for (; p < end; p++) {
622 if (name.isEmpty()) {
623 if (p + 1 < end && *(p + 1) == '\n')
624 return (p + 2) - start;
625 failureReason = "CR doesn't follow LF at " + trimInputSample(p, end - p);
628 failureReason = "Unexpected CR in name at " + trimInputSample(name.data(), name.size());
631 failureReason = "Unexpected LF in name at " + trimInputSample(name.data(), name.size());
645 for (; p < end && *p == 0x20; p++) { }
647 for (; p < end; p++) {
652 failureReason = "Unexpected LF in value at " + trimInputSample(value.data(), value.size());
662 if (p >= end || *p != '\n') {
663 failureReason = "CR doesn't follow LF after value at " + trimInputSample(p, end - p);
666 nameStr = AtomicString::fromUTF8(name.data(), name.size());
667 valueStr = AtomicString::fromUTF8(value.data(), value.size());
668 if (nameStr.isNull()) {
669 failureReason = "Invalid UTF-8 sequence in header name";
672 if (valueStr.isNull()) {
673 failureReason = "Invalid UTF-8 sequence in header value";
679 size_t parseHTTPRequestBody(const char* data, size_t length, Vector<unsigned char>& body)
682 body.append(data, length);