src/tools/swarming_client/utils/net.py

   1 # Copyright 2013 The Swarming Authors. All rights reserved.
   2 # Use of this source code is governed under the Apache License, Version 2.0 that
   3 # can be found in the LICENSE file.
   4
   5 """Classes and functions for generic network communication over HTTP."""
   6
   7 import cookielib
   8 import cStringIO as StringIO
   9 import httplib
  10 import itertools
  11 import json
  12 import logging
  13 import math
  14 import os
  15 import random
  16 import re
  17 import socket
  18 import ssl
  19 import sys
  20 import threading
  21 import time
  22 import urllib
  23 import urllib2
  24 import urlparse
  25
  26 from third_party import requests
  27 from third_party.requests import adapters
  28 from third_party.requests import structures
  29 from third_party.rietveld import upload
  30
  31 from utils import oauth
  32 from utils import tools
  33 from utils import zip_package
  34
  35 # Hack out upload logging.info()
  36 upload.logging = logging.getLogger('upload')
  37 # Mac pylint choke on this line.
  38 upload.logging.setLevel(logging.WARNING)  # pylint: disable=E1103
  39
  40
  41 # TODO(vadimsh): Remove this once we don't have to support python 2.6 anymore.
  42 def monkey_patch_httplib():
  43   """Patch httplib.HTTPConnection to have '_tunnel_host' attribute.
  44
  45   'requests' library (>= v2) accesses 'HTTPConnection._tunnel_host' attribute
  46   added only in python 2.6.3. This function patches HTTPConnection to have it
  47   on python 2.6.2 as well.
  48   """
  49   conn = httplib.HTTPConnection('example.com')
  50   if not hasattr(conn, '_tunnel_host'):
  51     httplib.HTTPConnection._tunnel_host = None
  52 monkey_patch_httplib()
  53
  54
  55 # The name of the key to store the count of url attempts.
  56 COUNT_KEY = 'UrlOpenAttempt'
  57
  58 # Default maximum number of attempts to trying opening a url before aborting.
  59 URL_OPEN_MAX_ATTEMPTS = 30
  60
  61 # Default timeout when retrying.
  62 URL_OPEN_TIMEOUT = 6*60.
  63
  64 # Default timeout when reading from open HTTP connection.
  65 URL_READ_TIMEOUT = 60
  66
  67 # Content type for url encoded POST body.
  68 URL_ENCODED_FORM_CONTENT_TYPE = 'application/x-www-form-urlencoded'
  69 # Content type for JSON body.
  70 JSON_CONTENT_TYPE = 'application/json; charset=UTF-8'
  71 # Default content type for POST body.
  72 DEFAULT_CONTENT_TYPE = URL_ENCODED_FORM_CONTENT_TYPE
  73
  74 # Content type -> function that encodes a request body.
  75 CONTENT_ENCODERS = {
  76   URL_ENCODED_FORM_CONTENT_TYPE:
  77     urllib.urlencode,
  78   JSON_CONTENT_TYPE:
  79     lambda x: json.dumps(x, sort_keys=True, separators=(',', ':')),
  80 }
  81
  82 # File to use to store all auth cookies.
  83 COOKIE_FILE = os.path.join(os.path.expanduser('~'), '.isolated_cookies')
  84
  85 # Google Storage URL regular expression.
  86 GS_STORAGE_HOST_URL_RE = re.compile(r'https://.*\.storage\.googleapis\.com')
  87
  88 # All possible authentication methods with corresponding types of method config
  89 # object (or None if method is not configurable). See configure_auth.
  90 # Order is important: it's visible in commands --help output.
  91 AUTH_METHODS = [
  92   ('oauth', oauth.OAuthConfig),
  93   ('cookie', None),
  94   ('bot', None),
  95   ('none', None),
  96 ]
  97
  98
  99 # Global (for now) map: server URL (http://example.com) -> HttpService instance.
 100 # Used by get_http_service to cache HttpService instances.
 101 _http_services = {}
 102 _http_services_lock = threading.Lock()
 103
 104 # CookieJar reused by all services + lock that protects its instantiation.
 105 _cookie_jar = None
 106 _cookie_jar_lock = threading.Lock()
 107
 108 # Path to cacert.pem bundle file reused by all services.
 109 _ca_certs = None
 110 _ca_certs_lock = threading.Lock()
 111
 112 # This lock ensures that user won't be confused with multiple concurrent
 113 # login prompts.
 114 _auth_lock = threading.Lock()
 115
 116 # Set in 'configure_auth'. If configure_auth is not called before first request,
 117 # will be set to defaults generated by 'get_default_auth_config'.
 118 _auth_method = None
 119 _auth_method_config = None
 120
 121
 122 class NetError(IOError):
 123   """Generic network related error."""
 124
 125   def __init__(self, inner_exc=None):
 126     super(NetError, self).__init__(str(inner_exc or self.__doc__))
 127     self.inner_exc = inner_exc
 128
 129   def format(self, verbose=False):
 130     """Human readable description with detailed information about the error."""
 131     out = [str(self.inner_exc)]
 132     if verbose:
 133       headers = None
 134       body = None
 135       if isinstance(self.inner_exc, urllib2.HTTPError):
 136         headers = self.inner_exc.hdrs.items()
 137         body = self.inner_exc.read()
 138       elif isinstance(self.inner_exc, requests.HTTPError):
 139         headers = self.inner_exc.response.headers.items()
 140         body = self.inner_exc.response.content
 141       if headers or body:
 142         out.append('----------')
 143         if headers:
 144           for header, value in headers:
 145             if not header.startswith('x-'):
 146               out.append('%s: %s' % (header.capitalize(), value))
 147           out.append('')
 148         out.append(body or '<empty body>')
 149         out.append('----------')
 150     return '\n'.join(out)
 151
 152
 153 class TimeoutError(NetError):
 154   """Timeout while reading HTTP response."""
 155
 156
 157 class ConnectionError(NetError):
 158   """Failed to connect to the server."""
 159
 160
 161 class HttpError(NetError):
 162   """Server returned HTTP error code."""
 163
 164   def __init__(self, code, inner_exc=None):
 165     super(HttpError, self).__init__(inner_exc)
 166     self.code = code
 167
 168
 169 def url_open(url, **kwargs):
 170   """Attempts to open the given url multiple times.
 171
 172   |data| can be either:
 173     - None for a GET request
 174     - str for pre-encoded data
 175     - list for data to be encoded
 176     - dict for data to be encoded
 177
 178   See HttpService.request for a full list of arguments.
 179
 180   Returns HttpResponse object, where the response may be read from, or None
 181   if it was unable to connect.
 182   """
 183   urlhost, urlpath = split_server_request_url(url)
 184   service = get_http_service(urlhost)
 185   return service.request(urlpath, **kwargs)
 186
 187
 188 def url_read(url, **kwargs):
 189   """Attempts to open the given url multiple times and read all data from it.
 190
 191   Accepts same arguments as url_open function.
 192
 193   Returns all data read or None if it was unable to connect or read the data.
 194   """
 195   kwargs['stream'] = False
 196   response = url_open(url, **kwargs)
 197   if not response:
 198     return None
 199   try:
 200     return response.read()
 201   except TimeoutError:
 202     return None
 203
 204
 205 def url_retrieve(filepath, url, **kwargs):
 206   """Downloads an URL to a file. Returns True on success."""
 207   response = url_open(url, **kwargs)
 208   if not response:
 209     return False
 210   try:
 211     with open(filepath, 'wb') as f:
 212       while True:
 213         buf = response.read(65536)
 214         if not buf:
 215           return True
 216         f.write(buf)
 217   except (IOError, OSError, TimeoutError):
 218     try:
 219       os.remove(filepath)
 220     except IOError:
 221       pass
 222     return False
 223
 224
 225 def split_server_request_url(url):
 226   """Splits the url into scheme+netloc and path+params+query+fragment."""
 227   url_parts = list(urlparse.urlparse(url))
 228   urlhost = '%s://%s' % (url_parts[0], url_parts[1])
 229   urlpath = urlparse.urlunparse(['', ''] + url_parts[2:])
 230   return urlhost, urlpath
 231
 232
 233 def get_http_service(urlhost, allow_cached=False, use_count_key=None):
 234   """Returns existing or creates new instance of HttpService that can send
 235   requests to given base urlhost.
 236   """
 237   def new_service():
 238     return HttpService(
 239         urlhost,
 240         engine=RequestsLibEngine(get_cacerts_bundle()),
 241         authenticator=create_authenticator(urlhost),
 242         use_count_key=use_count_key)
 243
 244   # Ensure consistency in url naming.
 245   urlhost = str(urlhost).lower().rstrip('/')
 246
 247   # Do not use COUNT_KEY with Google Storage (since it breaks a signature).
 248   if use_count_key is None:
 249     use_count_key = not GS_STORAGE_HOST_URL_RE.match(urlhost)
 250
 251   if not allow_cached:
 252     return new_service()
 253   with _http_services_lock:
 254     service = _http_services.get(urlhost)
 255     if not service:
 256       service = new_service()
 257       _http_services[urlhost] = service
 258     return service
 259
 260
 261 def get_cookie_jar():
 262   """Returns global CoookieJar object that stores cookies in the file."""
 263   global _cookie_jar
 264   with _cookie_jar_lock:
 265     if _cookie_jar is not None:
 266       return _cookie_jar
 267     jar = ThreadSafeCookieJar(COOKIE_FILE)
 268     jar.load()
 269     _cookie_jar = jar
 270     return jar
 271
 272
 273 def get_cacerts_bundle():
 274   """Returns path to a file with CA root certificates bundle."""
 275   global _ca_certs
 276   with _ca_certs_lock:
 277     if _ca_certs is not None and os.path.exists(_ca_certs):
 278       return _ca_certs
 279     _ca_certs = zip_package.extract_resource(requests, 'cacert.pem')
 280     return _ca_certs
 281
 282
 283 def get_default_auth_config():
 284   """Returns auth configuration used by default if configure_auth is not called.
 285
 286   If running in a headless mode on bots, will use 'bot' auth, otherwise
 287   'oauth' with default oauth config.
 288
 289   Returns pair (auth method name, auth method config).
 290   """
 291   if tools.is_headless():
 292     return 'bot', None
 293   else:
 294     return 'oauth', oauth.make_oauth_config()
 295
 296
 297 def configure_auth(method, config=None):
 298   """Defines what authentication methods to use.
 299
 300   Possible authentication methods are:
 301     'bot' - use HMAC authentication based on a secret key.
 302     'cookie' - use cookie-based authentication.
 303     'none' - do not use authentication.
 304     'oauth' - use oauth-based authentication.
 305
 306   Arguments:
 307     method: what method to use.
 308     config: object that holds configuration for authentication method.
 309         Concrete type depends on a method used (see AUTH_METHODS for expected
 310         type). Passed to corresponding authenticator instance.
 311   """
 312   global _auth_method
 313   global _auth_method_config
 314   assert method in dict(AUTH_METHODS), method
 315   config_type = dict(AUTH_METHODS)[method]
 316   if config_type and not isinstance(config, config_type):
 317     raise TypeError(
 318         'Expecting \'%s\' auth config to be of type %s. Got %s instead.' %
 319         (method, config_type, type(config)))
 320   elif not config_type and config is not None:
 321     raise TypeError('Auth method \'%s\' is not configurable.' % method)
 322   with _auth_lock:
 323     _auth_method = method
 324     _auth_method_config = config
 325
 326
 327 def get_auth_method():
 328   """Returns authentication method used by default.
 329
 330   Set with 'configure_auth'. See 'configure_auth' doc string for existing
 331   auth method.
 332   """
 333   return _auth_method
 334
 335
 336 def create_authenticator(urlhost):
 337   """Makes Authenticator instance used by HttpService to access |urlhost|."""
 338   # We use signed URL for Google Storage, no need for special authentication.
 339   if GS_STORAGE_HOST_URL_RE.match(urlhost):
 340     return None
 341
 342   # Lazy initialize auth config with defaults.
 343   if not _auth_method:
 344     default_method, default_config = get_default_auth_config()
 345     configure_auth(default_method, default_config)
 346
 347   # Use configuration set with 'configure_auth'.
 348   with _auth_lock:
 349     if _auth_method == 'bot':
 350       # TODO(vadimsh): Implement it. Use IP whitelist (that doesn't require
 351       # any authenticator instance) for now.
 352       return None
 353     elif _auth_method == 'cookie':
 354       return CookieBasedAuthenticator(urlhost, get_cookie_jar())
 355     elif _auth_method == 'none':
 356       return None
 357     elif _auth_method == 'oauth':
 358       return OAuthAuthenticator(urlhost, _auth_method_config)
 359   raise AssertionError('Invalid auth method: %s' % _auth_method)
 360
 361
 362 def get_case_insensitive_dict(original):
 363   """Given a dict with string keys returns new CaseInsensitiveDict.
 364
 365   Raises ValueError if there are duplicate keys.
 366   """
 367   normalized = structures.CaseInsensitiveDict(original or {})
 368   if len(normalized) != len(original):
 369     raise ValueError('Duplicate keys in: %s' % repr(original))
 370   return normalized
 371
 372
 373 class HttpService(object):
 374   """Base class for a class that provides an API to HTTP based service:
 375     - Provides 'request' method.
 376     - Supports automatic request retries.
 377     - Thread safe.
 378   """
 379
 380   def __init__(self, urlhost, engine, authenticator=None, use_count_key=True):
 381     self.urlhost = urlhost
 382     self.engine = engine
 383     self.authenticator = authenticator
 384     self.use_count_key = use_count_key
 385
 386   @staticmethod
 387   def is_transient_http_error(code, retry_404, retry_50x):
 388     """Returns True if given HTTP response code is a transient error."""
 389     # Google Storage can return this and it should be retried.
 390     if code == 408:
 391       return True
 392     # Retry 404 only if allowed by the caller.
 393     if code == 404:
 394       return retry_404
 395     # All other 4** errors are fatal.
 396     if code < 500:
 397       return False
 398     # Retry >= 500 error only if allowed by the caller.
 399     return retry_50x
 400
 401   @staticmethod
 402   def encode_request_body(body, content_type):
 403     """Returns request body encoded according to its content type."""
 404     # No body or it is already encoded.
 405     if body is None or isinstance(body, str):
 406       return body
 407     # Any body should have content type set.
 408     assert content_type, 'Request has body, but no content type'
 409     encoder = CONTENT_ENCODERS.get(content_type)
 410     assert encoder, ('Unknown content type %s' % content_type)
 411     return encoder(body)
 412
 413   def login(self, allow_user_interaction):
 414     """Runs authentication flow to refresh short lived access token.
 415
 416     Authentication flow may need to interact with the user (read username from
 417     stdin, open local browser for OAuth2, etc.). If interaction is required and
 418     |allow_user_interaction| is False, the login will silently be considered
 419     failed (i.e. this function returns False).
 420
 421     'request' method always uses non-interactive login, so long-lived
 422     authentication tokens (cookie, OAuth2 refresh token, etc) have to be set up
 423     manually by developer (by calling 'auth.py login' perhaps) prior running
 424     any swarming or isolate scripts.
 425     """
 426     # Use global lock to ensure two authentication flows never run in parallel.
 427     with _auth_lock:
 428       if self.authenticator:
 429         return self.authenticator.login(allow_user_interaction)
 430       return False
 431
 432   def logout(self):
 433     """Purges access credentials from local cache."""
 434     if self.authenticator:
 435       self.authenticator.logout()
 436
 437   def request(
 438       self,
 439       urlpath,
 440       data=None,
 441       content_type=None,
 442       max_attempts=URL_OPEN_MAX_ATTEMPTS,
 443       retry_404=False,
 444       retry_50x=True,
 445       timeout=URL_OPEN_TIMEOUT,
 446       read_timeout=URL_READ_TIMEOUT,
 447       stream=True,
 448       method=None,
 449       headers=None):
 450     """Attempts to open the given url multiple times.
 451
 452     |urlpath| is relative to the server root, i.e. '/some/request?param=1'.
 453
 454     |data| can be either:
 455       - None for a GET request
 456       - str for pre-encoded data
 457       - list for data to be form-encoded
 458       - dict for data to be form-encoded
 459
 460     - Optionally retries HTTP 404 and 50x.
 461     - Retries up to |max_attempts| times. If None or 0, there's no limit in the
 462       number of retries.
 463     - Retries up to |timeout| duration in seconds. If None or 0, there's no
 464       limit in the time taken to do retries.
 465     - If both |max_attempts| and |timeout| are None or 0, this functions retries
 466       indefinitely.
 467
 468     If |method| is given it can be 'GET', 'POST' or 'PUT' and it will be used
 469     when performing the request. By default it's GET if |data| is None and POST
 470     if |data| is not None.
 471
 472     If |headers| is given, it should be a dict with HTTP headers to append
 473     to request. Caller is responsible for providing headers that make sense.
 474
 475     If |read_timeout| is not None will configure underlying socket to
 476     raise TimeoutError exception whenever there's no response from the server
 477     for more than |read_timeout| seconds. It can happen during any read
 478     operation so once you pass non-None |read_timeout| be prepared to handle
 479     these exceptions in subsequent reads from the stream.
 480
 481     Returns a file-like object, where the response may be read from, or None
 482     if it was unable to connect. If |stream| is False will read whole response
 483     into memory buffer before returning file-like object that reads from this
 484     memory buffer.
 485     """
 486     assert urlpath and urlpath[0] == '/', urlpath
 487
 488     if data is not None:
 489       assert method in (None, 'POST', 'PUT')
 490       method = method or 'POST'
 491       content_type = content_type or DEFAULT_CONTENT_TYPE
 492       body = self.encode_request_body(data, content_type)
 493     else:
 494       assert method in (None, 'GET')
 495       method = method or 'GET'
 496       body = None
 497       assert not content_type, 'Can\'t use content_type on GET'
 498
 499     # Prepare request info.
 500     parsed = urlparse.urlparse('/' + urlpath.lstrip('/'))
 501     resource_url = urlparse.urljoin(self.urlhost, parsed.path)
 502     query_params = urlparse.parse_qsl(parsed.query)
 503
 504     # Prepare headers.
 505     headers = get_case_insensitive_dict(headers or {})
 506     if body is not None:
 507       headers['Content-Length'] = len(body)
 508       if content_type:
 509         headers['Content-Type'] = content_type
 510
 511     last_error = None
 512     auth_attempted = False
 513
 514     for attempt in retry_loop(max_attempts, timeout):
 515       # Log non-first attempt.
 516       if attempt.attempt:
 517         logging.warning(
 518             'Retrying request %s, attempt %d/%d...',
 519             resource_url, attempt.attempt, max_attempts)
 520
 521       try:
 522         # Prepare and send a new request.
 523         request = HttpRequest(method, resource_url, query_params, body,
 524             headers, read_timeout, stream)
 525         self.prepare_request(request, attempt.attempt)
 526         if self.authenticator:
 527           self.authenticator.authorize(request)
 528         response = self.engine.perform_request(request)
 529         logging.debug('Request %s succeeded', request.get_full_url())
 530         return response
 531
 532       except (ConnectionError, TimeoutError) as e:
 533         last_error = e
 534         logging.warning(
 535             'Unable to open url %s on attempt %d.\n%s',
 536             request.get_full_url(), attempt.attempt, e.format())
 537         continue
 538
 539       except HttpError as e:
 540         last_error = e
 541
 542         # Access denied -> authenticate.
 543         if e.code in (401, 403):
 544           logging.warning(
 545               'Authentication is required for %s on attempt %d.\n%s',
 546               request.get_full_url(), attempt.attempt, e.format())
 547           # Try to authenticate only once. If it doesn't help, then server does
 548           # not support authentication or user doesn't have required access.
 549           if not auth_attempted:
 550             auth_attempted = True
 551             if self.login(allow_user_interaction=False):
 552               # Success! Run request again immediately.
 553               attempt.skip_sleep = True
 554               continue
 555           # Authentication attempt was unsuccessful.
 556           logging.error(
 557               'Unable to authenticate to %s (%s). Use auth.py to login: '
 558               'python auth.py login --service=%s',
 559               self.urlhost, e.format(), self.urlhost)
 560           return None
 561
 562         # Hit a error that can not be retried -> stop retry loop.
 563         if not self.is_transient_http_error(e.code, retry_404, retry_50x):
 564           # This HttpError means we reached the server and there was a problem
 565           # with the request, so don't retry.
 566           logging.error(
 567               'Able to connect to %s but an exception was thrown.\n%s',
 568               request.get_full_url(), e.format(verbose=True))
 569           return None
 570
 571         # Retry all other errors.
 572         logging.warning(
 573             'Server responded with error on %s on attempt %d.\n%s',
 574             request.get_full_url(), attempt.attempt, e.format())
 575         continue
 576
 577     logging.error(
 578         'Unable to open given url, %s, after %d attempts.\n%s',
 579         request.get_full_url(), max_attempts, last_error.format(verbose=True))
 580     return None
 581
 582   def json_request(
 583       self,
 584       method,
 585       urlpath,
 586       body=None,
 587       max_attempts=URL_OPEN_MAX_ATTEMPTS,
 588       timeout=URL_OPEN_TIMEOUT,
 589       headers=None):
 590     """Sends JSON request to the server and parses JSON response it get back.
 591
 592     Arguments:
 593       method: HTTP method to use ('GET', 'POST', ...).
 594       urlpath: relative request path (e.g. '/auth/v1/...').
 595       body: object to serialize to JSON and sent in the request.
 596       max_attempts: how many times to retry 50x errors.
 597       timeout: how long to wait for a response (including all retries).
 598       headers: dict with additional request headers.
 599
 600     Returns:
 601       Deserialized JSON response on success, None on error or timeout.
 602     """
 603     response = self.request(
 604         urlpath,
 605         content_type=JSON_CONTENT_TYPE if body is not None else None,
 606         data=body,
 607         headers=headers,
 608         max_attempts=max_attempts,
 609         method=method,
 610         retry_404=False,
 611         retry_50x=True,
 612         stream=False,
 613         timeout=timeout)
 614     if not response:
 615       return None
 616     try:
 617       text = response.read()
 618       if not text:
 619         return None
 620     except TimeoutError:
 621       return None
 622     try:
 623       return json.loads(text)
 624     except ValueError:
 625       logging.error('Not a JSON response when calling %s: %s', urlpath, text)
 626       return None
 627
 628   def prepare_request(self, request, attempt):  # pylint: disable=R0201
 629     """Modify HttpRequest before sending it by adding COUNT_KEY parameter."""
 630     # Add COUNT_KEY only on retries.
 631     if self.use_count_key and attempt:
 632       request.params += [(COUNT_KEY, attempt)]
 633
 634
 635 class HttpRequest(object):
 636   """Request to HttpService."""
 637
 638   def __init__(self, method, url, params, body, headers, timeout, stream):
 639     """Arguments:
 640       |method| - HTTP method to use
 641       |url| - relative URL to the resource, without query parameters
 642       |params| - list of (key, value) pairs to put into GET parameters
 643       |body| - encoded body of the request (None or str)
 644       |headers| - dict with request headers
 645       |timeout| - socket read timeout (None to disable)
 646       |stream| - True to stream response from socket
 647     """
 648     self.method = method
 649     self.url = url
 650     self.params = params[:]
 651     self.body = body
 652     self.headers = headers.copy()
 653     self.timeout = timeout
 654     self.stream = stream
 655     self._cookies = None
 656
 657   @property
 658   def cookies(self):
 659     """CookieJar object that will be used for cookies in this request."""
 660     if self._cookies is None:
 661       self._cookies = cookielib.CookieJar()
 662     return self._cookies
 663
 664   def get_full_url(self):
 665     """Resource URL with url-encoded GET parameters."""
 666     if not self.params:
 667       return self.url
 668     else:
 669       return '%s?%s' % (self.url, urllib.urlencode(self.params))
 670
 671   def make_fake_response(self, content='', headers=None):
 672     """Makes new fake HttpResponse to this request, useful in tests."""
 673     return HttpResponse.get_fake_response(content, self.get_full_url(), headers)
 674
 675
 676 class HttpResponse(object):
 677   """Response from HttpService."""
 678
 679   def __init__(self, stream, url, headers):
 680     self._stream = stream
 681     self._url = url
 682     self._headers = get_case_insensitive_dict(headers)
 683     self._read = 0
 684
 685   @property
 686   def content_length(self):
 687     """Total length to the response or None if not known in advance."""
 688     length = self.get_header('Content-Length')
 689     return int(length) if length is not None else None
 690
 691   def get_header(self, header):
 692     """Returns response header (as str) or None if no such header."""
 693     return self._headers.get(header)
 694
 695   def read(self, size=None):
 696     """Reads up to |size| bytes from the stream and returns them.
 697
 698     If |size| is None reads all available bytes.
 699
 700     Raises TimeoutError on read timeout.
 701     """
 702     try:
 703       # cStringIO has a bug: stream.read(None) is not the same as stream.read().
 704       data = self._stream.read() if size is None else self._stream.read(size)
 705       self._read += len(data)
 706       return data
 707     except (socket.timeout, ssl.SSLError, requests.Timeout) as e:
 708       logging.error('Timeout while reading from %s, read %d of %s: %s',
 709           self._url, self._read, self.content_length, e)
 710       raise TimeoutError(e)
 711
 712   @classmethod
 713   def get_fake_response(cls, content, url, headers=None):
 714     """Returns HttpResponse with predefined content, useful in tests."""
 715     headers = dict(headers or {})
 716     headers['Content-Length'] = len(content)
 717     return cls(StringIO.StringIO(content), url, headers)
 718
 719
 720 class Authenticator(object):
 721   """Base class for objects that know how to authenticate into http services."""
 722
 723   def authorize(self, request):
 724     """Add authentication information to the request."""
 725
 726   def login(self, allow_user_interaction):
 727     """Run interactive authentication flow."""
 728     raise NotImplementedError()
 729
 730   def logout(self):
 731     """Purges access credentials from local cache."""
 732
 733
 734 class RequestsLibEngine(object):
 735   """Class that knows how to execute HttpRequests via requests library."""
 736
 737   # Preferred number of connections in a connection pool.
 738   CONNECTION_POOL_SIZE = 64
 739   # If True will not open more than CONNECTION_POOL_SIZE connections.
 740   CONNECTION_POOL_BLOCK = False
 741   # Maximum number of internal connection retries in a connection pool.
 742   CONNECTION_RETRIES = 0
 743
 744   def __init__(self, ca_certs):
 745     super(RequestsLibEngine, self).__init__()
 746     self.session = requests.Session()
 747     # Configure session.
 748     self.session.trust_env = False
 749     self.session.verify = ca_certs
 750     # Configure connection pools.
 751     for protocol in ('https://', 'http://'):
 752       self.session.mount(protocol, adapters.HTTPAdapter(
 753           pool_connections=self.CONNECTION_POOL_SIZE,
 754           pool_maxsize=self.CONNECTION_POOL_SIZE,
 755           max_retries=self.CONNECTION_RETRIES,
 756           pool_block=self.CONNECTION_POOL_BLOCK))
 757
 758   def perform_request(self, request):
 759     """Sends a HttpRequest to the server and reads back the response.
 760
 761     Returns HttpResponse.
 762
 763     Raises:
 764       ConnectionError - failed to establish connection to the server.
 765       TimeoutError - timeout while connecting or reading response.
 766       HttpError - server responded with >= 400 error code.
 767     """
 768     try:
 769       response = self.session.request(
 770           method=request.method,
 771           url=request.url,
 772           params=request.params,
 773           data=request.body,
 774           headers=request.headers,
 775           cookies=request.cookies,
 776           timeout=request.timeout,
 777           stream=request.stream)
 778       response.raise_for_status()
 779       if request.stream:
 780         stream = response.raw
 781       else:
 782         stream = StringIO.StringIO(response.content)
 783       return HttpResponse(stream, request.get_full_url(), response.headers)
 784     except requests.Timeout as e:
 785       raise TimeoutError(e)
 786     except requests.HTTPError as e:
 787       raise HttpError(e.response.status_code, e)
 788     except (requests.ConnectionError, socket.timeout, ssl.SSLError) as e:
 789       raise ConnectionError(e)
 790
 791
 792 # TODO(vadimsh): Remove once everything is using OAuth or HMAC-based auth.
 793 class CookieBasedAuthenticator(Authenticator):
 794   """Uses cookies (that AppEngine recognizes) to authenticate to |urlhost|."""
 795
 796   def __init__(self, urlhost, cookie_jar):
 797     super(CookieBasedAuthenticator, self).__init__()
 798     self.urlhost = urlhost
 799     self.cookie_jar = cookie_jar
 800     self.email = None
 801     self.password = None
 802     self._keyring = None
 803     self._lock = threading.Lock()
 804
 805   def authorize(self, request):
 806     # Copy all cookies from authenticator cookie jar to request cookie jar.
 807     with self._lock:
 808       with self.cookie_jar:
 809         for cookie in self.cookie_jar:
 810           request.cookies.set_cookie(cookie)
 811
 812   def login(self, allow_user_interaction):
 813     # Cookie authentication is always interactive (it asks for user name).
 814     if not allow_user_interaction:
 815       print >> sys.stderr, 'Cookie authentication requires interactive login'
 816       return False
 817     # To be used from inside AuthServer.
 818     cookie_jar = self.cookie_jar
 819     # RPC server that uses AuthenticationSupport's cookie jar.
 820     class AuthServer(upload.AbstractRpcServer):
 821       def _GetOpener(self):
 822         # Authentication code needs to know about 302 response.
 823         # So make OpenerDirector without HTTPRedirectHandler.
 824         opener = urllib2.OpenerDirector()
 825         opener.add_handler(urllib2.ProxyHandler())
 826         opener.add_handler(urllib2.UnknownHandler())
 827         opener.add_handler(urllib2.HTTPHandler())
 828         opener.add_handler(urllib2.HTTPDefaultErrorHandler())
 829         opener.add_handler(urllib2.HTTPSHandler())
 830         opener.add_handler(urllib2.HTTPErrorProcessor())
 831         opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
 832         return opener
 833       def PerformAuthentication(self):
 834         self._Authenticate()
 835         return self.authenticated
 836     with self._lock:
 837       with cookie_jar:
 838         rpc_server = AuthServer(self.urlhost, self.get_credentials)
 839         return rpc_server.PerformAuthentication()
 840
 841   def logout(self):
 842     domain = urlparse.urlparse(self.urlhost).netloc
 843     try:
 844       with self.cookie_jar:
 845         self.cookie_jar.clear(domain)
 846     except KeyError:
 847       pass
 848
 849   def get_credentials(self):
 850     """Called during authentication process to get the credentials.
 851
 852     May be called multiple times if authentication fails.
 853
 854     Returns tuple (email, password).
 855     """
 856     if self.email and self.password:
 857       return (self.email, self.password)
 858     self._keyring = self._keyring or upload.KeyringCreds(self.urlhost,
 859         self.urlhost, self.email)
 860     return self._keyring.GetUserCredentials()
 861
 862
 863 # TODO(vadimsh): Remove once everything is using OAuth or HMAC-based auth.
 864 class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
 865   """MozillaCookieJar with thread safe load and save."""
 866
 867   def __enter__(self):
 868     """Context manager interface."""
 869     return self
 870
 871   def __exit__(self, *_args):
 872     """Saves cookie jar when exiting the block."""
 873     self.save()
 874     return False
 875
 876   def load(self, filename=None, ignore_discard=False, ignore_expires=False):
 877     """Loads cookies from the file if it exists."""
 878     filename = os.path.expanduser(filename or self.filename)
 879     with self._cookies_lock:
 880       if os.path.exists(filename):
 881         try:
 882           cookielib.MozillaCookieJar.load(
 883               self, filename, ignore_discard, ignore_expires)
 884           logging.debug('Loaded cookies from %s', filename)
 885         except (cookielib.LoadError, IOError):
 886           pass
 887       else:
 888         try:
 889           fd = os.open(filename, os.O_CREAT, 0600)
 890           os.close(fd)
 891         except OSError:
 892           logging.debug('Failed to create %s', filename)
 893       try:
 894         os.chmod(filename, 0600)
 895       except OSError:
 896         logging.debug('Failed to fix mode for %s', filename)
 897
 898   def save(self, filename=None, ignore_discard=False, ignore_expires=False):
 899     """Saves cookies to the file, completely overwriting it."""
 900     logging.debug('Saving cookies to %s', filename or self.filename)
 901     with self._cookies_lock:
 902       try:
 903         cookielib.MozillaCookieJar.save(
 904             self, filename, ignore_discard, ignore_expires)
 905       except OSError:
 906         logging.error('Failed to save %s', filename)
 907
 908
 909 class OAuthAuthenticator(Authenticator):
 910   """Uses OAuth Authorization header to authenticate requests."""
 911
 912   def __init__(self, urlhost, config):
 913     super(OAuthAuthenticator, self).__init__()
 914     assert isinstance(config, oauth.OAuthConfig)
 915     self.urlhost = urlhost
 916     self.config = config
 917     self._lock = threading.Lock()
 918     self._access_token_known = False
 919     self._access_token = None
 920
 921   def authorize(self, request):
 922     with self._lock:
 923       if not self._access_token_known:
 924         self._access_token = oauth.load_access_token(self.urlhost, self.config)
 925         self._access_token_known = True
 926       if self._access_token:
 927         request.headers['Authorization'] = 'Bearer %s' % self._access_token
 928
 929   def login(self, allow_user_interaction):
 930     with self._lock:
 931       self._access_token = oauth.create_access_token(
 932           self.urlhost, self.config, allow_user_interaction)
 933       self._access_token_known = True
 934       return self._access_token is not None
 935
 936   def logout(self):
 937     with self._lock:
 938       self._access_token = None
 939       self._access_token_known = True
 940       oauth.purge_access_token(self.urlhost, self.config)
 941
 942
 943 class RetryAttempt(object):
 944   """Contains information about current retry attempt.
 945
 946   Yielded from retry_loop.
 947   """
 948
 949   def __init__(self, attempt, remaining):
 950     """Information about current attempt in retry loop:
 951       |attempt| - zero based index of attempt.
 952       |remaining| - how much time is left before retry loop finishes retries.
 953     """
 954     self.attempt = attempt
 955     self.remaining = remaining
 956     self.skip_sleep = False
 957
 958
 959 def calculate_sleep_before_retry(attempt, max_duration):
 960   """How long to sleep before retrying an attempt in retry_loop."""
 961   # Maximum sleeping time. We're hammering a cloud-distributed service, it'll
 962   # survive.
 963   MAX_SLEEP = 10.
 964   # random.random() returns [0.0, 1.0). Starts with relatively short waiting
 965   # time by starting with 1.5/2+1.5^-1 median offset.
 966   duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1))
 967   assert duration > 0.1
 968   duration = min(MAX_SLEEP, duration)
 969   if max_duration:
 970     duration = min(max_duration, duration)
 971   return duration
 972
 973
 974 def sleep_before_retry(attempt, max_duration):
 975   """Sleeps for some amount of time when retrying the attempt in retry_loop.
 976
 977   To be mocked in tests.
 978   """
 979   time.sleep(calculate_sleep_before_retry(attempt, max_duration))
 980
 981
 982 def current_time():
 983   """Used by retry loop to get current time.
 984
 985   To be mocked in tests.
 986   """
 987   return time.time()
 988
 989
 990 def retry_loop(max_attempts=None, timeout=None):
 991   """Yields whenever new attempt to perform some action is needed.
 992
 993   Yields instances of RetryAttempt class that contains information about current
 994   attempt. Setting |skip_sleep| attribute of RetryAttempt to True will cause
 995   retry loop to run next attempt immediately.
 996   """
 997   start = current_time()
 998   for attempt in itertools.count():
 999     # Too many attempts?
1000     if max_attempts and attempt == max_attempts:
1001       break
1002     # Retried for too long?
1003     remaining = (timeout - (current_time() - start)) if timeout else None
1004     if remaining is not None and remaining < 0:
1005       break
1006     # Kick next iteration.
1007     attemp_obj = RetryAttempt(attempt, remaining)
1008     yield attemp_obj
1009     if attemp_obj.skip_sleep:
1010       continue
1011     # Only sleep if we are going to try again.
1012     if max_attempts and attempt != max_attempts - 1:
1013       remaining = (timeout - (current_time() - start)) if timeout else None
1014       if remaining is not None and remaining < 0:
1015         break
1016       sleep_before_retry(attempt, remaining)