src/net/http/http_chunked_decoder.cc

   1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 // Derived from:
   6 //   mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
   7 // The license block is:
   8 /* ***** BEGIN LICENSE BLOCK *****
   9  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  10  *
  11  * The contents of this file are subject to the Mozilla Public License Version
  12  * 1.1 (the "License"); you may not use this file except in compliance with
  13  * the License. You may obtain a copy of the License at
  14  * http://www.mozilla.org/MPL/
  15  *
  16  * Software distributed under the License is distributed on an "AS IS" basis,
  17  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  18  * for the specific language governing rights and limitations under the
  19  * License.
  20  *
  21  * The Original Code is Mozilla.
  22  *
  23  * The Initial Developer of the Original Code is
  24  * Netscape Communications.
  25  * Portions created by the Initial Developer are Copyright (C) 2001
  26  * the Initial Developer. All Rights Reserved.
  27  *
  28  * Contributor(s):
  29  *   Darin Fisher <darin@netscape.com> (original author)
  30  *
  31  * Alternatively, the contents of this file may be used under the terms of
  32  * either the GNU General Public License Version 2 or later (the "GPL"), or
  33  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  34  * in which case the provisions of the GPL or the LGPL are applicable instead
  35  * of those above. If you wish to allow use of your version of this file only
  36  * under the terms of either the GPL or the LGPL, and not to allow others to
  37  * use your version of this file under the terms of the MPL, indicate your
  38  * decision by deleting the provisions above and replace them with the notice
  39  * and other provisions required by the GPL or the LGPL. If you do not delete
  40  * the provisions above, a recipient may use your version of this file under
  41  * the terms of any one of the MPL, the GPL or the LGPL.
  42  *
  43  * ***** END LICENSE BLOCK ***** */
  44
  45 #include "net/http/http_chunked_decoder.h"
  46
  47 #include <algorithm>
  48
  49 #include "base/logging.h"
  50 #include "base/strings/string_number_conversions.h"
  51 #include "base/strings/string_piece.h"
  52 #include "base/strings/string_util.h"
  53 #include "net/base/net_errors.h"
  54
  55 namespace net {
  56
  57 // Absurdly long size to avoid imposing a constraint on chunked encoding
  58 // extensions.
  59 const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384;
  60
  61 HttpChunkedDecoder::HttpChunkedDecoder()
  62     : chunk_remaining_(0),
  63       chunk_terminator_remaining_(false),
  64       reached_last_chunk_(false),
  65       reached_eof_(false),
  66       bytes_after_eof_(0) {
  67 }
  68
  69 int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) {
  70   int result = 0;
  71
  72   while (buf_len) {
  73     if (chunk_remaining_) {
  74       int num = std::min(chunk_remaining_, buf_len);
  75
  76       buf_len -= num;
  77       chunk_remaining_ -= num;
  78
  79       result += num;
  80       buf += num;
  81
  82       // After each chunk's data there should be a CRLF
  83       if (!chunk_remaining_)
  84         chunk_terminator_remaining_ = true;
  85       continue;
  86     } else if (reached_eof_) {
  87       bytes_after_eof_ += buf_len;
  88       break;  // Done!
  89     }
  90
  91     int bytes_consumed = ScanForChunkRemaining(buf, buf_len);
  92     if (bytes_consumed < 0)
  93       return bytes_consumed; // Error
  94
  95     buf_len -= bytes_consumed;
  96     if (buf_len)
  97       memmove(buf, buf + bytes_consumed, buf_len);
  98   }
  99
 100   return result;
 101 }
 102
 103 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
 104   DCHECK_EQ(0, chunk_remaining_);
 105   DCHECK_GT(buf_len, 0);
 106
 107   int bytes_consumed = 0;
 108
 109   size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n');
 110   if (index_of_lf != base::StringPiece::npos) {
 111     buf_len = static_cast<int>(index_of_lf);
 112     if (buf_len && buf[buf_len - 1] == '\r')  // Eliminate a preceding CR.
 113       buf_len--;
 114     bytes_consumed = static_cast<int>(index_of_lf) + 1;
 115
 116     // Make buf point to the full line buffer to parse.
 117     if (!line_buf_.empty()) {
 118       line_buf_.append(buf, buf_len);
 119       buf = line_buf_.data();
 120       buf_len = static_cast<int>(line_buf_.size());
 121     }
 122
 123     if (reached_last_chunk_) {
 124       if (buf_len)
 125         DVLOG(1) << "ignoring http trailer";
 126       else
 127         reached_eof_ = true;
 128     } else if (chunk_terminator_remaining_) {
 129       if (buf_len) {
 130         DLOG(ERROR) << "chunk data not terminated properly";
 131         return ERR_INVALID_CHUNKED_ENCODING;
 132       }
 133       chunk_terminator_remaining_ = false;
 134     } else if (buf_len) {
 135       // Ignore any chunk-extensions.
 136       size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';');
 137       if (index_of_semicolon != base::StringPiece::npos)
 138         buf_len = static_cast<int>(index_of_semicolon);
 139
 140       if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) {
 141         DLOG(ERROR) << "Failed parsing HEX from: " <<
 142             std::string(buf, buf_len);
 143         return ERR_INVALID_CHUNKED_ENCODING;
 144       }
 145
 146       if (chunk_remaining_ == 0)
 147         reached_last_chunk_ = true;
 148     } else {
 149       DLOG(ERROR) << "missing chunk-size";
 150       return ERR_INVALID_CHUNKED_ENCODING;
 151     }
 152     line_buf_.clear();
 153   } else {
 154     // Save the partial line; wait for more data.
 155     bytes_consumed = buf_len;
 156
 157     // Ignore a trailing CR
 158     if (buf[buf_len - 1] == '\r')
 159       buf_len--;
 160
 161     if (line_buf_.length() + buf_len > kMaxLineBufLen) {
 162       DLOG(ERROR) << "Chunked line length too long";
 163       return ERR_INVALID_CHUNKED_ENCODING;
 164     }
 165
 166     line_buf_.append(buf, buf_len);
 167   }
 168   return bytes_consumed;
 169 }
 170
 171
 172 // While the HTTP 1.1 specification defines chunk-size as 1*HEX
 173 // some sites rely on more lenient parsing.
 174 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
 175 // (0x20) to be 7 characters long, such as "819b   ".
 176 //
 177 // A comparison of browsers running on WindowsXP shows that
 178 // they will parse the following inputs (egrep syntax):
 179 //
 180 // Let \X be the character class for a hex digit: [0-9a-fA-F]
 181 //
 182 //   RFC 2616: ^\X+$
 183 //        IE7: ^\X+[^\X]*$
 184 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$
 185 //  Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
 186 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
 187 //
 188 // Our strategy is to be as strict as possible, while not breaking
 189 // known sites.
 190 //
 191 //         Us: ^\X+[ ]*$
 192 bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) {
 193   DCHECK_GE(len, 0);
 194
 195   // Strip trailing spaces
 196   while (len && start[len - 1] == ' ')
 197     len--;
 198
 199   // Be more restrictive than HexStringToInt;
 200   // don't allow inputs with leading "-", "+", "0x", "0X"
 201   base::StringPiece chunk_size(start, len);
 202   if (chunk_size.find_first_not_of("0123456789abcdefABCDEF")
 203       != base::StringPiece::npos) {
 204     return false;
 205   }
 206
 207   int parsed_number;
 208   bool ok = base::HexStringToInt(chunk_size, &parsed_number);
 209   if (ok && parsed_number >= 0) {
 210     *out = parsed_number;
 211     return true;
 212   }
 213   return false;
 214 }
 215
 216 }  // namespace net