pdf/url_loader_wrapper_impl.cc

   1 // Copyright 2016 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "pdf/url_loader_wrapper_impl.h"
   6
   7 #include <memory>
   8
   9 #include "base/bind.h"
  10 #include "base/logging.h"
  11 #include "base/strings/string_util.h"
  12 #include "base/strings/stringprintf.h"
  13 #include "net/http/http_util.h"
  14 #include "ppapi/c/pp_errors.h"
  15 #include "ppapi/cpp/logging.h"
  16 #include "ppapi/cpp/url_request_info.h"
  17 #include "ppapi/cpp/url_response_info.h"
  18
  19 namespace chrome_pdf {
  20
  21 namespace {
  22
  23 // We should read with delay to prevent block UI thread, and reduce CPU usage.
  24 constexpr base::TimeDelta kReadDelayMs = base::TimeDelta::FromMilliseconds(2);
  25
  26 pp::URLRequestInfo MakeRangeRequest(pp::Instance* plugin_instance,
  27                                     const std::string& url,
  28                                     const std::string& referrer_url,
  29                                     uint32_t position,
  30                                     uint32_t size) {
  31   pp::URLRequestInfo request(plugin_instance);
  32   request.SetURL(url);
  33   request.SetMethod("GET");
  34   request.SetFollowRedirects(false);
  35   request.SetCustomReferrerURL(referrer_url);
  36
  37   // According to rfc2616, byte range specifies position of the first and last
  38   // bytes in the requested range inclusively. Therefore we should subtract 1
  39   // from the position + size, to get index of the last byte that needs to be
  40   // downloaded.
  41   std::string str_header =
  42       base::StringPrintf("Range: bytes=%d-%d", position, position + size - 1);
  43   pp::Var header(str_header.c_str());
  44   request.SetHeaders(header);
  45
  46   return request;
  47 }
  48
  49 bool GetByteRangeFromStr(const std::string& content_range_str,
  50                          int* start,
  51                          int* end) {
  52   std::string range = content_range_str;
  53   if (!base::StartsWith(range, "bytes", base::CompareCase::INSENSITIVE_ASCII))
  54     return false;
  55
  56   range = range.substr(strlen("bytes"));
  57   std::string::size_type pos = range.find('-');
  58   std::string range_end;
  59   if (pos != std::string::npos)
  60     range_end = range.substr(pos + 1);
  61   base::TrimWhitespaceASCII(range, base::TRIM_LEADING, &range);
  62   base::TrimWhitespaceASCII(range_end, base::TRIM_LEADING, &range_end);
  63   *start = atoi(range.c_str());
  64   *end = atoi(range_end.c_str());
  65   return true;
  66 }
  67
  68 // If the headers have a byte-range response, writes the start and end
  69 // positions and returns true if at least the start position was parsed.
  70 // The end position will be set to 0 if it was not found or parsed from the
  71 // response.
  72 // Returns false if not even a start position could be parsed.
  73 bool GetByteRangeFromHeaders(const std::string& headers, int* start, int* end) {
  74   net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\n");
  75   while (it.GetNext()) {
  76     if (base::LowerCaseEqualsASCII(it.name_piece(), "content-range")) {
  77       if (GetByteRangeFromStr(it.values().c_str(), start, end))
  78         return true;
  79     }
  80   }
  81   return false;
  82 }
  83
  84 bool IsDoubleEndLineAtEnd(const char* buffer, int size) {
  85   if (size < 2)
  86     return false;
  87
  88   if (buffer[size - 1] == '\n' && buffer[size - 2] == '\n')
  89     return true;
  90
  91   if (size < 4)
  92     return false;
  93
  94   return buffer[size - 1] == '\n' && buffer[size - 2] == '\r' &&
  95          buffer[size - 3] == '\n' && buffer[size - 4] == '\r';
  96 }
  97
  98 }  // namespace
  99
 100 URLLoaderWrapperImpl::URLLoaderWrapperImpl(pp::Instance* plugin_instance,
 101                                            const pp::URLLoader& url_loader)
 102     : plugin_instance_(plugin_instance),
 103       url_loader_(url_loader),
 104       callback_factory_(this) {
 105   SetHeadersFromLoader();
 106 }
 107
 108 URLLoaderWrapperImpl::~URLLoaderWrapperImpl() {
 109   Close();
 110   // We should call callbacks to prevent memory leaks.
 111   // The callbacks don't do anything, because the objects that created the
 112   // callbacks have been destroyed.
 113   if (!did_open_callback_.IsOptional())
 114     did_open_callback_.RunAndClear(-1);
 115   if (!did_read_callback_.IsOptional())
 116     did_read_callback_.RunAndClear(-1);
 117 }
 118
 119 int URLLoaderWrapperImpl::GetContentLength() const {
 120   return content_length_;
 121 }
 122
 123 bool URLLoaderWrapperImpl::IsAcceptRangesBytes() const {
 124   return accept_ranges_bytes_;
 125 }
 126
 127 bool URLLoaderWrapperImpl::IsContentEncoded() const {
 128   return content_encoded_;
 129 }
 130
 131 std::string URLLoaderWrapperImpl::GetContentType() const {
 132   return content_type_;
 133 }
 134 std::string URLLoaderWrapperImpl::GetContentDisposition() const {
 135   return content_disposition_;
 136 }
 137
 138 int URLLoaderWrapperImpl::GetStatusCode() const {
 139   return url_loader_.GetResponseInfo().GetStatusCode();
 140 }
 141
 142 bool URLLoaderWrapperImpl::IsMultipart() const {
 143   return is_multipart_;
 144 }
 145
 146 bool URLLoaderWrapperImpl::GetByteRangeStart(int* start) const {
 147   DCHECK(start);
 148   *start = byte_range_.start();
 149   return byte_range_.IsValid();
 150 }
 151
 152 bool URLLoaderWrapperImpl::GetDownloadProgress(
 153     int64_t* bytes_received,
 154     int64_t* total_bytes_to_be_received) const {
 155   return url_loader_.GetDownloadProgress(bytes_received,
 156                                          total_bytes_to_be_received);
 157 }
 158
 159 void URLLoaderWrapperImpl::Close() {
 160   url_loader_.Close();
 161   read_starter_.Stop();
 162 }
 163
 164 void URLLoaderWrapperImpl::OpenRange(const std::string& url,
 165                                      const std::string& referrer_url,
 166                                      uint32_t position,
 167                                      uint32_t size,
 168                                      const pp::CompletionCallback& cc) {
 169   did_open_callback_ = cc;
 170   pp::CompletionCallback callback =
 171       callback_factory_.NewCallback(&URLLoaderWrapperImpl::DidOpen);
 172   int rv = url_loader_.Open(
 173       MakeRangeRequest(plugin_instance_, url, referrer_url, position, size),
 174       callback);
 175   if (rv != PP_OK_COMPLETIONPENDING)
 176     callback.Run(rv);
 177 }
 178
 179 void URLLoaderWrapperImpl::ReadResponseBody(char* buffer,
 180                                             int buffer_size,
 181                                             const pp::CompletionCallback& cc) {
 182   did_read_callback_ = cc;
 183   buffer_ = buffer;
 184   buffer_size_ = buffer_size;
 185   read_starter_.Start(
 186       FROM_HERE, kReadDelayMs,
 187       base::BindRepeating(&URLLoaderWrapperImpl::ReadResponseBodyImpl,
 188                           base::Unretained(this)));
 189 }
 190
 191 void URLLoaderWrapperImpl::ReadResponseBodyImpl() {
 192   pp::CompletionCallback callback =
 193       callback_factory_.NewCallback(&URLLoaderWrapperImpl::DidRead);
 194   int rv = url_loader_.ReadResponseBody(buffer_, buffer_size_, callback);
 195   if (rv != PP_OK_COMPLETIONPENDING) {
 196     callback.Run(rv);
 197   }
 198 }
 199
 200 void URLLoaderWrapperImpl::SetResponseHeaders(
 201     const std::string& response_headers) {
 202   response_headers_ = response_headers;
 203   ParseHeaders();
 204 }
 205
 206 void URLLoaderWrapperImpl::ParseHeaders() {
 207   content_length_ = -1;
 208   accept_ranges_bytes_ = false;
 209   content_encoded_ = false;
 210   content_type_.clear();
 211   content_disposition_.clear();
 212   multipart_boundary_.clear();
 213   byte_range_ = gfx::Range::InvalidRange();
 214   is_multipart_ = false;
 215
 216   if (response_headers_.empty())
 217     return;
 218
 219   net::HttpUtil::HeadersIterator it(response_headers_.begin(),
 220                                     response_headers_.end(), "\n");
 221   while (it.GetNext()) {
 222     base::StringPiece name = it.name_piece();
 223     if (base::LowerCaseEqualsASCII(name, "content-length")) {
 224       content_length_ = atoi(it.values().c_str());
 225     } else if (base::LowerCaseEqualsASCII(name, "accept-ranges")) {
 226       accept_ranges_bytes_ = base::LowerCaseEqualsASCII(it.values(), "bytes");
 227     } else if (base::LowerCaseEqualsASCII(name, "content-encoding")) {
 228       content_encoded_ = true;
 229     } else if (base::LowerCaseEqualsASCII(name, "content-type")) {
 230       content_type_ = it.values();
 231       size_t semi_colon_pos = content_type_.find(';');
 232       if (semi_colon_pos != std::string::npos) {
 233         content_type_ = content_type_.substr(0, semi_colon_pos);
 234       }
 235       base::TrimWhitespaceASCII(content_type_, base::TRIM_ALL, &content_type_);
 236       // multipart boundary.
 237       std::string type = base::ToLowerASCII(it.values_piece());
 238       if (base::StartsWith(type, "multipart/", base::CompareCase::SENSITIVE)) {
 239         const char* boundary = strstr(type.c_str(), "boundary=");
 240         DCHECK(boundary);
 241         if (boundary) {
 242           multipart_boundary_ = std::string(boundary + 9);
 243           is_multipart_ = !multipart_boundary_.empty();
 244         }
 245       }
 246     } else if (base::LowerCaseEqualsASCII(name, "content-disposition")) {
 247       content_disposition_ = it.values();
 248     } else if (base::LowerCaseEqualsASCII(name, "content-range")) {
 249       int start = 0;
 250       int end = 0;
 251       if (GetByteRangeFromStr(it.values().c_str(), &start, &end)) {
 252         byte_range_ = gfx::Range(start, end);
 253       }
 254     }
 255   }
 256 }
 257
 258 void URLLoaderWrapperImpl::DidOpen(int32_t result) {
 259   SetHeadersFromLoader();
 260   did_open_callback_.RunAndClear(result);
 261 }
 262
 263 void URLLoaderWrapperImpl::DidRead(int32_t result) {
 264   if (multi_part_processed_) {
 265     // Reset this flag so we look inside the buffer in calls of DidRead for this
 266     // response only once.  Note that this code DOES NOT handle multi part
 267     // responses with more than one part (we don't issue them at the moment, so
 268     // they shouldn't arrive).
 269     is_multipart_ = false;
 270   }
 271   if (result <= 0 || !is_multipart_) {
 272     did_read_callback_.RunAndClear(result);
 273     return;
 274   }
 275   if (result <= 2) {
 276     // TODO(art-snake): Accumulate data for parse headers.
 277     did_read_callback_.RunAndClear(result);
 278     return;
 279   }
 280
 281   char* start = buffer_;
 282   size_t length = result;
 283   multi_part_processed_ = true;
 284   for (int i = 2; i < result; ++i) {
 285     if (IsDoubleEndLineAtEnd(buffer_, i)) {
 286       int start_pos = 0;
 287       int end_pos = 0;
 288       if (GetByteRangeFromHeaders(std::string(buffer_, i), &start_pos,
 289                                   &end_pos)) {
 290         byte_range_ = gfx::Range(start_pos, end_pos);
 291         start += i;
 292         length -= i;
 293       }
 294       break;
 295     }
 296   }
 297   result = length;
 298   if (result == 0) {
 299     // Continue receiving.
 300     return ReadResponseBodyImpl();
 301   }
 302   DCHECK_GT(result, 0);
 303   memmove(buffer_, start, result);
 304
 305   did_read_callback_.RunAndClear(result);
 306 }
 307
 308 void URLLoaderWrapperImpl::SetHeadersFromLoader() {
 309   pp::URLResponseInfo response = url_loader_.GetResponseInfo();
 310   pp::Var headers_var = response.GetHeaders();
 311
 312   SetResponseHeaders(headers_var.is_string() ? headers_var.AsString() : "");
 313 }
 314
 315 }  // namespace chrome_pdf