src/tools/swarming_client/utils/net.py

   1 # Copyright 2013 The Swarming Authors. All rights reserved.
   2 # Use of this source code is governed under the Apache License, Version 2.0 that
   3 # can be found in the LICENSE file.
   4
   5 """Classes and functions for generic network communication over HTTP."""
   6
   7 import cookielib
   8 import cStringIO as StringIO
   9 import httplib
  10 import itertools
  11 import json
  12 import logging
  13 import math
  14 import os
  15 import random
  16 import re
  17 import socket
  18 import ssl
  19 import sys
  20 import threading
  21 import time
  22 import urllib
  23 import urllib2
  24 import urlparse
  25
  26 from third_party import requests
  27 from third_party.requests import adapters
  28 from third_party.requests import structures
  29 from third_party.rietveld import upload
  30
  31 from utils import oauth
  32 from utils import tools
  33 from utils import zip_package
  34
  35 # Hack out upload logging.info()
  36 upload.logging = logging.getLogger('upload')
  37 # Mac pylint choke on this line.
  38 upload.logging.setLevel(logging.WARNING)  # pylint: disable=E1103
  39
  40
  41 # TODO(vadimsh): Remove this once we don't have to support python 2.6 anymore.
  42 def monkey_patch_httplib():
  43   """Patch httplib.HTTPConnection to have '_tunnel_host' attribute.
  44
  45   'requests' library (>= v2) accesses 'HTTPConnection._tunnel_host' attribute
  46   added only in python 2.6.3. This function patches HTTPConnection to have it
  47   on python 2.6.2 as well.
  48   """
  49   conn = httplib.HTTPConnection('example.com')
  50   if not hasattr(conn, '_tunnel_host'):
  51     httplib.HTTPConnection._tunnel_host = None
  52 monkey_patch_httplib()
  53
  54
  55 # The name of the key to store the count of url attempts.
  56 COUNT_KEY = 'UrlOpenAttempt'
  57
  58 # Default maximum number of attempts to trying opening a url before aborting.
  59 URL_OPEN_MAX_ATTEMPTS = 30
  60
  61 # Default timeout when retrying.
  62 URL_OPEN_TIMEOUT = 6*60.
  63
  64 # Default timeout when reading from open HTTP connection.
  65 URL_READ_TIMEOUT = 60
  66
  67 # Content type for url encoded POST body.
  68 URL_ENCODED_FORM_CONTENT_TYPE = 'application/x-www-form-urlencoded'
  69 # Content type for JSON body.
  70 JSON_CONTENT_TYPE = 'application/json; charset=UTF-8'
  71 # Default content type for POST body.
  72 DEFAULT_CONTENT_TYPE = URL_ENCODED_FORM_CONTENT_TYPE
  73
  74 # Content type -> function that encodes a request body.
  75 CONTENT_ENCODERS = {
  76   URL_ENCODED_FORM_CONTENT_TYPE:
  77     urllib.urlencode,
  78   JSON_CONTENT_TYPE:
  79     lambda x: json.dumps(x, sort_keys=True, separators=(',', ':')),
  80 }
  81
  82 # File to use to store all auth cookies.
  83 COOKIE_FILE = os.path.join(os.path.expanduser('~'), '.isolated_cookies')
  84
  85 # Google Storage URL regular expression.
  86 GS_STORAGE_HOST_URL_RE = re.compile(r'https://.*\.storage\.googleapis\.com')
  87
  88 # All possible authentication methods with corresponding types of method config
  89 # object (or None if method is not configurable). See configure_auth.
  90 # Order is important: it's visible in commands --help output.
  91 AUTH_METHODS = [
  92   ('oauth', oauth.OAuthConfig),
  93   ('cookie', None),
  94   ('bot', None),
  95   ('none', None),
  96 ]
  97
  98
  99 # Global (for now) map: server URL (http://example.com) -> HttpService instance.
 100 # Used by get_http_service to cache HttpService instances.
 101 _http_services = {}
 102 _http_services_lock = threading.Lock()
 103
 104 # CookieJar reused by all services + lock that protects its instantiation.
 105 _cookie_jar = None
 106 _cookie_jar_lock = threading.Lock()
 107
 108 # Path to cacert.pem bundle file reused by all services.
 109 _ca_certs = None
 110 _ca_certs_lock = threading.Lock()
 111
 112 # This lock ensures that user won't be confused with multiple concurrent
 113 # login prompts.
 114 _auth_lock = threading.Lock()
 115
 116 # Set in 'configure_auth'. If configure_auth is not called before first request,
 117 # will be set to defaults generated by 'get_default_auth_config'.
 118 _auth_method = None
 119 _auth_method_config = None
 120
 121
 122 class NetError(IOError):
 123   """Generic network related error."""
 124
 125   def __init__(self, inner_exc=None):
 126     super(NetError, self).__init__(str(inner_exc or self.__doc__))
 127     self.inner_exc = inner_exc
 128
 129   def format(self, verbose=False):
 130     """Human readable description with detailed information about the error."""
 131     out = [str(self.inner_exc)]
 132     if verbose:
 133       headers = None
 134       body = None
 135       if isinstance(self.inner_exc, urllib2.HTTPError):
 136         headers = self.inner_exc.hdrs.items()
 137         body = self.inner_exc.read()
 138       elif isinstance(self.inner_exc, requests.HTTPError):
 139         headers = self.inner_exc.response.headers.items()
 140         body = self.inner_exc.response.content
 141       if headers or body:
 142         out.append('----------')
 143         if headers:
 144           for header, value in headers:
 145             if not header.startswith('x-'):
 146               out.append('%s: %s' % (header.capitalize(), value))
 147           out.append('')
 148         out.append(body or '<empty body>')
 149         out.append('----------')
 150     return '\n'.join(out)
 151
 152
 153 class TimeoutError(NetError):
 154   """Timeout while reading HTTP response."""
 155
 156
 157 class ConnectionError(NetError):
 158   """Failed to connect to the server."""
 159
 160
 161 class HttpError(NetError):
 162   """Server returned HTTP error code."""
 163
 164   def __init__(self, code, inner_exc=None):
 165     super(HttpError, self).__init__(inner_exc)
 166     self.code = code
 167
 168
 169 def url_open(url, **kwargs):
 170   """Attempts to open the given url multiple times.
 171
 172   |data| can be either:
 173     - None for a GET request
 174     - str for pre-encoded data
 175     - list for data to be encoded
 176     - dict for data to be encoded
 177
 178   See HttpService.request for a full list of arguments.
 179
 180   Returns HttpResponse object, where the response may be read from, or None
 181   if it was unable to connect.
 182   """
 183   urlhost, urlpath = split_server_request_url(url)
 184   service = get_http_service(urlhost)
 185   return service.request(urlpath, **kwargs)
 186
 187
 188 def url_read(url, **kwargs):
 189   """Attempts to open the given url multiple times and read all data from it.
 190
 191   Accepts same arguments as url_open function.
 192
 193   Returns all data read or None if it was unable to connect or read the data.
 194   """
 195   kwargs['stream'] = False
 196   response = url_open(url, **kwargs)
 197   if not response:
 198     return None
 199   try:
 200     return response.read()
 201   except TimeoutError:
 202     return None
 203
 204
 205 def url_retrieve(filepath, url, **kwargs):
 206   """Downloads an URL to a file. Returns True on success."""
 207   response = url_open(url, **kwargs)
 208   if not response:
 209     return False
 210   try:
 211     with open(filepath, 'wb') as f:
 212       while True:
 213         buf = response.read(65536)
 214         if not buf:
 215           return True
 216         f.write(buf)
 217   except (IOError, OSError, TimeoutError):
 218     try:
 219       os.remove(filepath)
 220     except IOError:
 221       pass
 222     return False
 223
 224
 225 def split_server_request_url(url):
 226   """Splits the url into scheme+netloc and path+params+query+fragment."""
 227   url_parts = list(urlparse.urlparse(url))
 228   urlhost = '%s://%s' % (url_parts[0], url_parts[1])
 229   urlpath = urlparse.urlunparse(['', ''] + url_parts[2:])
 230   return urlhost, urlpath
 231
 232
 233 def get_http_service(urlhost):
 234   """Returns existing or creates new instance of HttpService that can send
 235   requests to given base urlhost.
 236   """
 237   # Ensure consistency in url naming.
 238   urlhost = str(urlhost).lower().rstrip('/')
 239   # Do not use COUNT_KEY with Google Storage (since it breaks a signature).
 240   use_count_key = not GS_STORAGE_HOST_URL_RE.match(urlhost)
 241   with _http_services_lock:
 242     service = _http_services.get(urlhost)
 243     if not service:
 244       service = HttpService(
 245           urlhost,
 246           engine=RequestsLibEngine(get_cacerts_bundle()),
 247           authenticator=create_authenticator(urlhost),
 248           use_count_key=use_count_key)
 249       _http_services[urlhost] = service
 250     return service
 251
 252
 253 def get_cookie_jar():
 254   """Returns global CoookieJar object that stores cookies in the file."""
 255   global _cookie_jar
 256   with _cookie_jar_lock:
 257     if _cookie_jar is not None:
 258       return _cookie_jar
 259     jar = ThreadSafeCookieJar(COOKIE_FILE)
 260     jar.load()
 261     _cookie_jar = jar
 262     return jar
 263
 264
 265 def get_cacerts_bundle():
 266   """Returns path to a file with CA root certificates bundle."""
 267   global _ca_certs
 268   with _ca_certs_lock:
 269     if _ca_certs is not None and os.path.exists(_ca_certs):
 270       return _ca_certs
 271     _ca_certs = zip_package.extract_resource(requests, 'cacert.pem')
 272     return _ca_certs
 273
 274
 275 def get_default_auth_config():
 276   """Returns auth configuration used by default if configure_auth is not called.
 277
 278   If running in a headless mode on bots, will use 'bot' auth, otherwise
 279   'oauth' with default oauth config.
 280
 281   Returns pair (auth method name, auth method config).
 282   """
 283   if tools.is_headless():
 284     return 'bot', None
 285   else:
 286     return 'oauth', oauth.make_oauth_config()
 287
 288
 289 def configure_auth(method, config=None):
 290   """Defines what authentication methods to use.
 291
 292   Possible authentication methods are:
 293     'bot' - use HMAC authentication based on a secret key.
 294     'cookie' - use cookie-based authentication.
 295     'none' - do not use authentication.
 296     'oauth' - use oauth-based authentication.
 297
 298   Arguments:
 299     method: what method to use.
 300     config: object that holds configuration for authentication method.
 301         Concrete type depends on a method used (see AUTH_METHODS for expected
 302         type). Passed to corresponding authenticator instance.
 303   """
 304   global _auth_method
 305   global _auth_method_config
 306   assert method in dict(AUTH_METHODS), method
 307   config_type = dict(AUTH_METHODS)[method]
 308   if config_type and not isinstance(config, config_type):
 309     raise TypeError(
 310         'Expecting \'%s\' auth config to be of type %s. Got %s instead.' %
 311         (method, config_type, type(config)))
 312   elif not config_type and config is not None:
 313     raise TypeError('Auth method \'%s\' is not configurable.' % method)
 314   with _auth_lock:
 315     _auth_method = method
 316     _auth_method_config = config
 317
 318
 319 def create_authenticator(urlhost):
 320   """Makes Authenticator instance used by HttpService to access |urlhost|."""
 321   # We use signed URL for Google Storage, no need for special authentication.
 322   if GS_STORAGE_HOST_URL_RE.match(urlhost):
 323     return None
 324
 325   # Lazy initialize auth config with defaults.
 326   if not _auth_method:
 327     default_method, default_config = get_default_auth_config()
 328     configure_auth(default_method, default_config)
 329
 330   # Use configuration set with 'configure_auth'.
 331   with _auth_lock:
 332     if _auth_method == 'bot':
 333       # TODO(vadimsh): Implement it. Use IP whitelist (that doesn't require
 334       # any authenticator instance) for now.
 335       return None
 336     elif _auth_method == 'cookie':
 337       return CookieBasedAuthenticator(urlhost, get_cookie_jar())
 338     elif _auth_method == 'none':
 339       return None
 340     elif _auth_method == 'oauth':
 341       return OAuthAuthenticator(urlhost, _auth_method_config)
 342   raise AssertionError('Invalid auth method: %s' % _auth_method)
 343
 344
 345 def get_case_insensitive_dict(original):
 346   """Given a dict with string keys returns new CaseInsensitiveDict.
 347
 348   Raises ValueError if there are duplicate keys.
 349   """
 350   normalized = structures.CaseInsensitiveDict(original or {})
 351   if len(normalized) != len(original):
 352     raise ValueError('Duplicate keys in: %s' % repr(original))
 353   return normalized
 354
 355
 356 class HttpService(object):
 357   """Base class for a class that provides an API to HTTP based service:
 358     - Provides 'request' method.
 359     - Supports automatic request retries.
 360     - Thread safe.
 361   """
 362
 363   def __init__(self, urlhost, engine, authenticator=None, use_count_key=True):
 364     self.urlhost = urlhost
 365     self.engine = engine
 366     self.authenticator = authenticator
 367     self.use_count_key = use_count_key
 368
 369   @staticmethod
 370   def is_transient_http_error(code, retry_404, retry_50x):
 371     """Returns True if given HTTP response code is a transient error."""
 372     # Google Storage can return this and it should be retried.
 373     if code == 408:
 374       return True
 375     # Retry 404 only if allowed by the caller.
 376     if code == 404:
 377       return retry_404
 378     # All other 4** errors are fatal.
 379     if code < 500:
 380       return False
 381     # Retry >= 500 error only if allowed by the caller.
 382     return retry_50x
 383
 384   @staticmethod
 385   def encode_request_body(body, content_type):
 386     """Returns request body encoded according to its content type."""
 387     # No body or it is already encoded.
 388     if body is None or isinstance(body, str):
 389       return body
 390     # Any body should have content type set.
 391     assert content_type, 'Request has body, but no content type'
 392     encoder = CONTENT_ENCODERS.get(content_type)
 393     assert encoder, ('Unknown content type %s' % content_type)
 394     return encoder(body)
 395
 396   def login(self, allow_user_interaction):
 397     """Runs authentication flow to refresh short lived access token.
 398
 399     Authentication flow may need to interact with the user (read username from
 400     stdin, open local browser for OAuth2, etc.). If interaction is required and
 401     |allow_user_interaction| is False, the login will silently be considered
 402     failed (i.e. this function returns False).
 403
 404     'request' method always uses non-interactive login, so long-lived
 405     authentication tokens (cookie, OAuth2 refresh token, etc) have to be set up
 406     manually by developer (by calling 'auth.py login' perhaps) prior running
 407     any swarming or isolate scripts.
 408     """
 409     # Use global lock to ensure two authentication flows never run in parallel.
 410     with _auth_lock:
 411       if self.authenticator:
 412         return self.authenticator.login(allow_user_interaction)
 413       return False
 414
 415   def logout(self):
 416     """Purges access credentials from local cache."""
 417     if self.authenticator:
 418       self.authenticator.logout()
 419
 420   def request(
 421       self,
 422       urlpath,
 423       data=None,
 424       content_type=None,
 425       max_attempts=URL_OPEN_MAX_ATTEMPTS,
 426       retry_404=False,
 427       retry_50x=True,
 428       timeout=URL_OPEN_TIMEOUT,
 429       read_timeout=URL_READ_TIMEOUT,
 430       stream=True,
 431       method=None,
 432       headers=None):
 433     """Attempts to open the given url multiple times.
 434
 435     |urlpath| is relative to the server root, i.e. '/some/request?param=1'.
 436
 437     |data| can be either:
 438       - None for a GET request
 439       - str for pre-encoded data
 440       - list for data to be form-encoded
 441       - dict for data to be form-encoded
 442
 443     - Optionally retries HTTP 404 and 50x.
 444     - Retries up to |max_attempts| times. If None or 0, there's no limit in the
 445       number of retries.
 446     - Retries up to |timeout| duration in seconds. If None or 0, there's no
 447       limit in the time taken to do retries.
 448     - If both |max_attempts| and |timeout| are None or 0, this functions retries
 449       indefinitely.
 450
 451     If |method| is given it can be 'GET', 'POST' or 'PUT' and it will be used
 452     when performing the request. By default it's GET if |data| is None and POST
 453     if |data| is not None.
 454
 455     If |headers| is given, it should be a dict with HTTP headers to append
 456     to request. Caller is responsible for providing headers that make sense.
 457
 458     If |read_timeout| is not None will configure underlying socket to
 459     raise TimeoutError exception whenever there's no response from the server
 460     for more than |read_timeout| seconds. It can happen during any read
 461     operation so once you pass non-None |read_timeout| be prepared to handle
 462     these exceptions in subsequent reads from the stream.
 463
 464     Returns a file-like object, where the response may be read from, or None
 465     if it was unable to connect. If |stream| is False will read whole response
 466     into memory buffer before returning file-like object that reads from this
 467     memory buffer.
 468     """
 469     assert urlpath and urlpath[0] == '/', urlpath
 470
 471     if data is not None:
 472       assert method in (None, 'POST', 'PUT')
 473       method = method or 'POST'
 474       content_type = content_type or DEFAULT_CONTENT_TYPE
 475       body = self.encode_request_body(data, content_type)
 476     else:
 477       assert method in (None, 'GET')
 478       method = method or 'GET'
 479       body = None
 480       assert not content_type, 'Can\'t use content_type on GET'
 481
 482     # Prepare request info.
 483     parsed = urlparse.urlparse('/' + urlpath.lstrip('/'))
 484     resource_url = urlparse.urljoin(self.urlhost, parsed.path)
 485     query_params = urlparse.parse_qsl(parsed.query)
 486
 487     # Prepare headers.
 488     headers = get_case_insensitive_dict(headers or {})
 489     if body is not None:
 490       headers['Content-Length'] = len(body)
 491       if content_type:
 492         headers['Content-Type'] = content_type
 493
 494     last_error = None
 495     auth_attempted = False
 496
 497     for attempt in retry_loop(max_attempts, timeout):
 498       # Log non-first attempt.
 499       if attempt.attempt:
 500         logging.warning(
 501             'Retrying request %s, attempt %d/%d...',
 502             resource_url, attempt.attempt, max_attempts)
 503
 504       try:
 505         # Prepare and send a new request.
 506         request = HttpRequest(method, resource_url, query_params, body,
 507             headers, read_timeout, stream)
 508         self.prepare_request(request, attempt.attempt)
 509         if self.authenticator:
 510           self.authenticator.authorize(request)
 511         response = self.engine.perform_request(request)
 512         logging.debug('Request %s succeeded', request.get_full_url())
 513         return response
 514
 515       except (ConnectionError, TimeoutError) as e:
 516         last_error = e
 517         logging.warning(
 518             'Unable to open url %s on attempt %d.\n%s',
 519             request.get_full_url(), attempt.attempt, e.format())
 520         continue
 521
 522       except HttpError as e:
 523         last_error = e
 524
 525         # Access denied -> authenticate.
 526         if e.code in (401, 403):
 527           logging.warning(
 528               'Authentication is required for %s on attempt %d.\n%s',
 529               request.get_full_url(), attempt.attempt, e.format())
 530           # Try to authenticate only once. If it doesn't help, then server does
 531           # not support authentication or user doesn't have required access.
 532           if not auth_attempted:
 533             auth_attempted = True
 534             if self.login(allow_user_interaction=False):
 535               # Success! Run request again immediately.
 536               attempt.skip_sleep = True
 537               continue
 538           # Authentication attempt was unsuccessful.
 539           logging.error(
 540               'Unable to authenticate to %s (%s). Use auth.py to login: '
 541               'python auth.py login --service=%s',
 542               self.urlhost, e.format(), self.urlhost)
 543           return None
 544
 545         # Hit a error that can not be retried -> stop retry loop.
 546         if not self.is_transient_http_error(e.code, retry_404, retry_50x):
 547           # This HttpError means we reached the server and there was a problem
 548           # with the request, so don't retry.
 549           logging.error(
 550               'Able to connect to %s but an exception was thrown.\n%s',
 551               request.get_full_url(), e.format(verbose=True))
 552           return None
 553
 554         # Retry all other errors.
 555         logging.warning(
 556             'Server responded with error on %s on attempt %d.\n%s',
 557             request.get_full_url(), attempt.attempt, e.format())
 558         continue
 559
 560     logging.error(
 561         'Unable to open given url, %s, after %d attempts.\n%s',
 562         request.get_full_url(), max_attempts, last_error.format(verbose=True))
 563     return None
 564
 565   def json_request(
 566       self,
 567       method,
 568       urlpath,
 569       body=None,
 570       max_attempts=URL_OPEN_MAX_ATTEMPTS,
 571       timeout=URL_OPEN_TIMEOUT,
 572       headers=None):
 573     """Sends JSON request to the server and parses JSON response it get back.
 574
 575     Arguments:
 576       method: HTTP method to use ('GET', 'POST', ...).
 577       urlpath: relative request path (e.g. '/auth/v1/...').
 578       body: object to serialize to JSON and sent in the request.
 579       max_attempts: how many times to retry 50x errors.
 580       timeout: how long to wait for a response (including all retries).
 581       headers: dict with additional request headers.
 582
 583     Returns:
 584       Deserialized JSON response on success, None on error or timeout.
 585     """
 586     response = self.request(
 587         urlpath,
 588         content_type=JSON_CONTENT_TYPE if body is not None else None,
 589         data=body,
 590         headers=headers,
 591         max_attempts=max_attempts,
 592         method=method,
 593         retry_404=False,
 594         retry_50x=True,
 595         stream=False,
 596         timeout=timeout)
 597     if not response:
 598       return None
 599     try:
 600       text = response.read()
 601       if not text:
 602         return None
 603     except TimeoutError:
 604       return None
 605     try:
 606       return json.loads(text)
 607     except ValueError:
 608       logging.error('Not a JSON response when calling %s: %s', urlpath, text)
 609       return None
 610
 611   def prepare_request(self, request, attempt):  # pylint: disable=R0201
 612     """Modify HttpRequest before sending it by adding COUNT_KEY parameter."""
 613     # Add COUNT_KEY only on retries.
 614     if self.use_count_key and attempt:
 615       request.params += [(COUNT_KEY, attempt)]
 616
 617
 618 class HttpRequest(object):
 619   """Request to HttpService."""
 620
 621   def __init__(self, method, url, params, body, headers, timeout, stream):
 622     """Arguments:
 623       |method| - HTTP method to use
 624       |url| - relative URL to the resource, without query parameters
 625       |params| - list of (key, value) pairs to put into GET parameters
 626       |body| - encoded body of the request (None or str)
 627       |headers| - dict with request headers
 628       |timeout| - socket read timeout (None to disable)
 629       |stream| - True to stream response from socket
 630     """
 631     self.method = method
 632     self.url = url
 633     self.params = params[:]
 634     self.body = body
 635     self.headers = headers.copy()
 636     self.timeout = timeout
 637     self.stream = stream
 638     self._cookies = None
 639
 640   @property
 641   def cookies(self):
 642     """CookieJar object that will be used for cookies in this request."""
 643     if self._cookies is None:
 644       self._cookies = cookielib.CookieJar()
 645     return self._cookies
 646
 647   def get_full_url(self):
 648     """Resource URL with url-encoded GET parameters."""
 649     if not self.params:
 650       return self.url
 651     else:
 652       return '%s?%s' % (self.url, urllib.urlencode(self.params))
 653
 654   def make_fake_response(self, content='', headers=None):
 655     """Makes new fake HttpResponse to this request, useful in tests."""
 656     return HttpResponse.get_fake_response(content, self.get_full_url(), headers)
 657
 658
 659 class HttpResponse(object):
 660   """Response from HttpService."""
 661
 662   def __init__(self, stream, url, headers):
 663     self._stream = stream
 664     self._url = url
 665     self._headers = get_case_insensitive_dict(headers)
 666     self._read = 0
 667
 668   @property
 669   def content_length(self):
 670     """Total length to the response or None if not known in advance."""
 671     length = self.get_header('Content-Length')
 672     return int(length) if length is not None else None
 673
 674   def get_header(self, header):
 675     """Returns response header (as str) or None if no such header."""
 676     return self._headers.get(header)
 677
 678   def read(self, size=None):
 679     """Reads up to |size| bytes from the stream and returns them.
 680
 681     If |size| is None reads all available bytes.
 682
 683     Raises TimeoutError on read timeout.
 684     """
 685     try:
 686       # cStringIO has a bug: stream.read(None) is not the same as stream.read().
 687       data = self._stream.read() if size is None else self._stream.read(size)
 688       self._read += len(data)
 689       return data
 690     except (socket.timeout, ssl.SSLError, requests.Timeout) as e:
 691       logging.error('Timeout while reading from %s, read %d of %s: %s',
 692           self._url, self._read, self.content_length, e)
 693       raise TimeoutError(e)
 694
 695   @classmethod
 696   def get_fake_response(cls, content, url, headers=None):
 697     """Returns HttpResponse with predefined content, useful in tests."""
 698     headers = dict(headers or {})
 699     headers['Content-Length'] = len(content)
 700     return cls(StringIO.StringIO(content), url, headers)
 701
 702
 703 class Authenticator(object):
 704   """Base class for objects that know how to authenticate into http services."""
 705
 706   def authorize(self, request):
 707     """Add authentication information to the request."""
 708
 709   def login(self, allow_user_interaction):
 710     """Run interactive authentication flow."""
 711     raise NotImplementedError()
 712
 713   def logout(self):
 714     """Purges access credentials from local cache."""
 715
 716
 717 class RequestsLibEngine(object):
 718   """Class that knows how to execute HttpRequests via requests library."""
 719
 720   # Preferred number of connections in a connection pool.
 721   CONNECTION_POOL_SIZE = 64
 722   # If True will not open more than CONNECTION_POOL_SIZE connections.
 723   CONNECTION_POOL_BLOCK = False
 724   # Maximum number of internal connection retries in a connection pool.
 725   CONNECTION_RETRIES = 0
 726
 727   def __init__(self, ca_certs):
 728     super(RequestsLibEngine, self).__init__()
 729     self.session = requests.Session()
 730     # Configure session.
 731     self.session.trust_env = False
 732     self.session.verify = ca_certs
 733     # Configure connection pools.
 734     for protocol in ('https://', 'http://'):
 735       self.session.mount(protocol, adapters.HTTPAdapter(
 736           pool_connections=self.CONNECTION_POOL_SIZE,
 737           pool_maxsize=self.CONNECTION_POOL_SIZE,
 738           max_retries=self.CONNECTION_RETRIES,
 739           pool_block=self.CONNECTION_POOL_BLOCK))
 740
 741   def perform_request(self, request):
 742     """Sends a HttpRequest to the server and reads back the response.
 743
 744     Returns HttpResponse.
 745
 746     Raises:
 747       ConnectionError - failed to establish connection to the server.
 748       TimeoutError - timeout while connecting or reading response.
 749       HttpError - server responded with >= 400 error code.
 750     """
 751     try:
 752       response = self.session.request(
 753           method=request.method,
 754           url=request.url,
 755           params=request.params,
 756           data=request.body,
 757           headers=request.headers,
 758           cookies=request.cookies,
 759           timeout=request.timeout,
 760           stream=request.stream)
 761       response.raise_for_status()
 762       if request.stream:
 763         stream = response.raw
 764       else:
 765         stream = StringIO.StringIO(response.content)
 766       return HttpResponse(stream, request.get_full_url(), response.headers)
 767     except requests.Timeout as e:
 768       raise TimeoutError(e)
 769     except requests.HTTPError as e:
 770       raise HttpError(e.response.status_code, e)
 771     except (requests.ConnectionError, socket.timeout, ssl.SSLError) as e:
 772       raise ConnectionError(e)
 773
 774
 775 # TODO(vadimsh): Remove once everything is using OAuth or HMAC-based auth.
 776 class CookieBasedAuthenticator(Authenticator):
 777   """Uses cookies (that AppEngine recognizes) to authenticate to |urlhost|."""
 778
 779   def __init__(self, urlhost, cookie_jar):
 780     super(CookieBasedAuthenticator, self).__init__()
 781     self.urlhost = urlhost
 782     self.cookie_jar = cookie_jar
 783     self.email = None
 784     self.password = None
 785     self._keyring = None
 786     self._lock = threading.Lock()
 787
 788   def authorize(self, request):
 789     # Copy all cookies from authenticator cookie jar to request cookie jar.
 790     with self._lock:
 791       with self.cookie_jar:
 792         for cookie in self.cookie_jar:
 793           request.cookies.set_cookie(cookie)
 794
 795   def login(self, allow_user_interaction):
 796     # Cookie authentication is always interactive (it asks for user name).
 797     if not allow_user_interaction:
 798       print >> sys.stderr, 'Cookie authentication requires interactive login'
 799       return False
 800     # To be used from inside AuthServer.
 801     cookie_jar = self.cookie_jar
 802     # RPC server that uses AuthenticationSupport's cookie jar.
 803     class AuthServer(upload.AbstractRpcServer):
 804       def _GetOpener(self):
 805         # Authentication code needs to know about 302 response.
 806         # So make OpenerDirector without HTTPRedirectHandler.
 807         opener = urllib2.OpenerDirector()
 808         opener.add_handler(urllib2.ProxyHandler())
 809         opener.add_handler(urllib2.UnknownHandler())
 810         opener.add_handler(urllib2.HTTPHandler())
 811         opener.add_handler(urllib2.HTTPDefaultErrorHandler())
 812         opener.add_handler(urllib2.HTTPSHandler())
 813         opener.add_handler(urllib2.HTTPErrorProcessor())
 814         opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
 815         return opener
 816       def PerformAuthentication(self):
 817         self._Authenticate()
 818         return self.authenticated
 819     with self._lock:
 820       with cookie_jar:
 821         rpc_server = AuthServer(self.urlhost, self.get_credentials)
 822         return rpc_server.PerformAuthentication()
 823
 824   def logout(self):
 825     domain = urlparse.urlparse(self.urlhost).netloc
 826     try:
 827       with self.cookie_jar:
 828         self.cookie_jar.clear(domain)
 829     except KeyError:
 830       pass
 831
 832   def get_credentials(self):
 833     """Called during authentication process to get the credentials.
 834
 835     May be called multiple times if authentication fails.
 836
 837     Returns tuple (email, password).
 838     """
 839     if self.email and self.password:
 840       return (self.email, self.password)
 841     self._keyring = self._keyring or upload.KeyringCreds(self.urlhost,
 842         self.urlhost, self.email)
 843     return self._keyring.GetUserCredentials()
 844
 845
 846 # TODO(vadimsh): Remove once everything is using OAuth or HMAC-based auth.
 847 class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
 848   """MozillaCookieJar with thread safe load and save."""
 849
 850   def __enter__(self):
 851     """Context manager interface."""
 852     return self
 853
 854   def __exit__(self, *_args):
 855     """Saves cookie jar when exiting the block."""
 856     self.save()
 857     return False
 858
 859   def load(self, filename=None, ignore_discard=False, ignore_expires=False):
 860     """Loads cookies from the file if it exists."""
 861     filename = os.path.expanduser(filename or self.filename)
 862     with self._cookies_lock:
 863       if os.path.exists(filename):
 864         try:
 865           cookielib.MozillaCookieJar.load(
 866               self, filename, ignore_discard, ignore_expires)
 867           logging.debug('Loaded cookies from %s', filename)
 868         except (cookielib.LoadError, IOError):
 869           pass
 870       else:
 871         try:
 872           fd = os.open(filename, os.O_CREAT, 0600)
 873           os.close(fd)
 874         except OSError:
 875           logging.debug('Failed to create %s', filename)
 876       try:
 877         os.chmod(filename, 0600)
 878       except OSError:
 879         logging.debug('Failed to fix mode for %s', filename)
 880
 881   def save(self, filename=None, ignore_discard=False, ignore_expires=False):
 882     """Saves cookies to the file, completely overwriting it."""
 883     logging.debug('Saving cookies to %s', filename or self.filename)
 884     with self._cookies_lock:
 885       try:
 886         cookielib.MozillaCookieJar.save(
 887             self, filename, ignore_discard, ignore_expires)
 888       except OSError:
 889         logging.error('Failed to save %s', filename)
 890
 891
 892 class OAuthAuthenticator(Authenticator):
 893   """Uses OAuth Authorization header to authenticate requests."""
 894
 895   def __init__(self, urlhost, config):
 896     super(OAuthAuthenticator, self).__init__()
 897     assert isinstance(config, oauth.OAuthConfig)
 898     self.urlhost = urlhost
 899     self.config = config
 900     self._lock = threading.Lock()
 901     self._access_token = oauth.load_access_token(self.urlhost, self.config)
 902
 903   def authorize(self, request):
 904     with self._lock:
 905       if self._access_token:
 906         request.headers['Authorization'] = 'Bearer %s' % self._access_token
 907
 908   def login(self, allow_user_interaction):
 909     with self._lock:
 910       self._access_token = oauth.create_access_token(
 911           self.urlhost, self.config, allow_user_interaction)
 912       return self._access_token is not None
 913
 914   def logout(self):
 915     with self._lock:
 916       self._access_token = None
 917       oauth.purge_access_token(self.urlhost, self.config)
 918
 919
 920 class RetryAttempt(object):
 921   """Contains information about current retry attempt.
 922
 923   Yielded from retry_loop.
 924   """
 925
 926   def __init__(self, attempt, remaining):
 927     """Information about current attempt in retry loop:
 928       |attempt| - zero based index of attempt.
 929       |remaining| - how much time is left before retry loop finishes retries.
 930     """
 931     self.attempt = attempt
 932     self.remaining = remaining
 933     self.skip_sleep = False
 934
 935
 936 def calculate_sleep_before_retry(attempt, max_duration):
 937   """How long to sleep before retrying an attempt in retry_loop."""
 938   # Maximum sleeping time. We're hammering a cloud-distributed service, it'll
 939   # survive.
 940   MAX_SLEEP = 10.
 941   # random.random() returns [0.0, 1.0). Starts with relatively short waiting
 942   # time by starting with 1.5/2+1.5^-1 median offset.
 943   duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1))
 944   assert duration > 0.1
 945   duration = min(MAX_SLEEP, duration)
 946   if max_duration:
 947     duration = min(max_duration, duration)
 948   return duration
 949
 950
 951 def sleep_before_retry(attempt, max_duration):
 952   """Sleeps for some amount of time when retrying the attempt in retry_loop.
 953
 954   To be mocked in tests.
 955   """
 956   time.sleep(calculate_sleep_before_retry(attempt, max_duration))
 957
 958
 959 def current_time():
 960   """Used by retry loop to get current time.
 961
 962   To be mocked in tests.
 963   """
 964   return time.time()
 965
 966
 967 def retry_loop(max_attempts=None, timeout=None):
 968   """Yields whenever new attempt to perform some action is needed.
 969
 970   Yields instances of RetryAttempt class that contains information about current
 971   attempt. Setting |skip_sleep| attribute of RetryAttempt to True will cause
 972   retry loop to run next attempt immediately.
 973   """
 974   start = current_time()
 975   for attempt in itertools.count():
 976     # Too many attempts?
 977     if max_attempts and attempt == max_attempts:
 978       break
 979     # Retried for too long?
 980     remaining = (timeout - (current_time() - start)) if timeout else None
 981     if remaining is not None and remaining < 0:
 982       break
 983     # Kick next iteration.
 984     attemp_obj = RetryAttempt(attempt, remaining)
 985     yield attemp_obj
 986     if attemp_obj.skip_sleep:
 987       continue
 988     # Only sleep if we are going to try again.
 989     if max_attempts and attempt != max_attempts - 1:
 990       remaining = (timeout - (current_time() - start)) if timeout else None
 991       if remaining is not None and remaining < 0:
 992         break
 993       sleep_before_retry(attempt, remaining)