1 from __future__ import generators
5 A caching http interface that supports ETags and gzip
8 Requires Python 2.3 or later
11 2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
15 __author__ = "Joe Gregorio (joe@bitworking.org)"
16 __copyright__ = "Copyright 2006, Joe Gregorio"
17 __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
19 "Xavier Verges Farrero",
32 import email.FeedParser
47 from hashlib import sha1 as _sha, md5 as _md5
49 # prior to Python 2.5, these were separate modules
55 from gettext import gettext as _
59 from httplib2 import socks
63 except (ImportError, AttributeError):
66 # Build the appropriate socket wrapper for ssl
68 import ssl # python 2.6
69 ssl_SSLError = ssl.SSLError
70 def _ssl_wrap_socket(sock, key_file, cert_file,
71 disable_validation, ca_certs):
72 if disable_validation:
73 cert_reqs = ssl.CERT_NONE
75 cert_reqs = ssl.CERT_REQUIRED
76 # We should be specifying SSL version 3 or TLS v1, but the ssl module
77 # doesn't expose the necessary knobs. So we need to go with the default
79 return ssl.wrap_socket(sock, keyfile=key_file, certfile=cert_file,
80 cert_reqs=cert_reqs, ca_certs=ca_certs)
81 except (AttributeError, ImportError):
83 def _ssl_wrap_socket(sock, key_file, cert_file,
84 disable_validation, ca_certs):
85 if not disable_validation:
86 raise CertificateValidationUnsupported(
87 "SSL certificate validation is not supported without "
88 "the ssl module installed. To avoid this error, install "
89 "the ssl module, or explicity disable validation.")
90 ssl_sock = socket.ssl(sock, key_file, cert_file)
91 return httplib.FakeSocket(sock, ssl_sock)
94 if sys.version_info >= (2,3):
95 from iri2uri import iri2uri
100 def has_timeout(timeout): # python 2.6
101 if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):
102 return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)
103 return (timeout is not None)
106 'Http', 'Response', 'ProxyInfo', 'HttpLib2Error', 'RedirectMissingLocation',
107 'RedirectLimit', 'FailedToDecompressContent',
108 'UnimplementedDigestAuthOptionError',
109 'UnimplementedHmacDigestAuthOptionError',
110 'debuglevel', 'ProxiesUnavailableError']
113 # The httplib debug level, set to a non-zero value to get debug output
116 # A request will be tried 'RETRIES' times if it fails at the socket/connection level.
120 if sys.version_info < (2,4):
126 def HTTPResponse__getheaders(self):
127 """Return list of (header, value) tuples."""
129 raise httplib.ResponseNotReady()
130 return self.msg.items()
132 if not hasattr(httplib.HTTPResponse, 'getheaders'):
133 httplib.HTTPResponse.getheaders = HTTPResponse__getheaders
135 # All exceptions raised here derive from HttpLib2Error
136 class HttpLib2Error(Exception): pass
138 # Some exceptions can be caught and optionally
139 # be turned back into responses.
140 class HttpLib2ErrorWithResponse(HttpLib2Error):
141 def __init__(self, desc, response, content):
142 self.response = response
143 self.content = content
144 HttpLib2Error.__init__(self, desc)
146 class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
147 class RedirectLimit(HttpLib2ErrorWithResponse): pass
148 class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
149 class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
150 class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
152 class MalformedHeader(HttpLib2Error): pass
153 class RelativeURIError(HttpLib2Error): pass
154 class ServerNotFoundError(HttpLib2Error): pass
155 class ProxiesUnavailableError(HttpLib2Error): pass
156 class CertificateValidationUnsupported(HttpLib2Error): pass
157 class SSLHandshakeError(HttpLib2Error): pass
158 class NotSupportedOnThisPlatform(HttpLib2Error): pass
159 class CertificateHostnameMismatch(SSLHandshakeError):
160 def __init__(self, desc, host, cert):
161 HttpLib2Error.__init__(self, desc)
169 # Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
171 # Pluggable cache storage (supports storing the cache in
172 # flat files by default. We need a plug-in architecture
173 # that can support Berkeley DB and Squid)
176 # Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
177 # Does not handle Cache-Control: max-stale
178 # Does not use Age: headers when calculating cache freshness.
181 # The number of redirections to follow before giving up.
182 # Note that only GET redirects are automatically followed.
183 # Will also honor 301 requests by saving that info and never
184 # requesting that URI again.
185 DEFAULT_MAX_REDIRECTS = 5
188 # Users can optionally provide a module that tells us where the CA_CERTS
190 import ca_certs_locater
191 CA_CERTS = ca_certs_locater.get()
193 # Default CA certificates file bundled with httplib2.
194 CA_CERTS = os.path.join(
195 os.path.dirname(os.path.abspath(__file__ )), "cacerts.txt")
197 # Which headers are hop-by-hop headers by default
198 HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']
200 def _get_end2end_headers(response):
201 hopbyhop = list(HOP_BY_HOP)
202 hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])
203 return [header for header in response.keys() if header not in hopbyhop]
205 URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
208 """Parses a URI using the regex given in Appendix B of RFC 3986.
210 (scheme, authority, path, query, fragment) = parse_uri(uri)
212 groups = URI.match(uri).groups()
213 return (groups[1], groups[3], groups[4], groups[6], groups[8])
216 (scheme, authority, path, query, fragment) = parse_uri(uri)
217 if not scheme or not authority:
218 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
219 authority = authority.lower()
220 scheme = scheme.lower()
223 # Could do syntax based normalization of the URI before
224 # computing the digest. See Section 6.2.2 of Std 66.
225 request_uri = query and "?".join([path, query]) or path
226 scheme = scheme.lower()
227 defrag_uri = scheme + "://" + authority + request_uri
228 return scheme, authority, request_uri, defrag_uri
231 # Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
232 re_url_scheme = re.compile(r'^\w+://')
233 re_slash = re.compile(r'[?/:|]+')
235 def safename(filename):
236 """Return a filename suitable for the cache.
238 Strips dangerous and common characters to create a filename we
239 can use to store the cache in.
243 if re_url_scheme.match(filename):
244 if isinstance(filename,str):
245 filename = filename.decode('utf-8')
246 filename = filename.encode('idna')
248 filename = filename.encode('idna')
251 if isinstance(filename,unicode):
252 filename=filename.encode('utf-8')
253 filemd5 = _md5(filename).hexdigest()
254 filename = re_url_scheme.sub("", filename)
255 filename = re_slash.sub(",", filename)
257 # limit length of filename
258 if len(filename)>200:
259 filename=filename[:200]
260 return ",".join((filename, filemd5))
262 NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
263 def _normalize_headers(headers):
264 return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()])
266 def _parse_cache_control(headers):
268 if headers.has_key('cache-control'):
269 parts = headers['cache-control'].split(',')
270 parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]
271 parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
272 retval = dict(parts_with_args + parts_wo_args)
275 # Whether to use a strict mode to parse WWW-Authenticate headers
276 # Might lead to bad results in case of ill-formed header value,
277 # so disabled by default, falling back to relaxed parsing.
278 # Set to true to turn on, usefull for testing servers.
279 USE_WWW_AUTH_STRICT_PARSING = 0
282 # [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP
283 # "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
284 # Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
285 # \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
286 WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")
287 WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")
288 UNQUOTE_PAIRS = re.compile(r'\\(.)')
289 def _parse_www_authenticate(headers, headername='www-authenticate'):
290 """Returns a dictionary of dictionaries, one dict
293 if headers.has_key(headername):
296 authenticate = headers[headername].strip()
297 www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
299 # Break off the scheme at the beginning of the line
300 if headername == 'authentication-info':
301 (auth_scheme, the_rest) = ('digest', authenticate)
303 (auth_scheme, the_rest) = authenticate.split(" ", 1)
304 # Now loop over all the key value pairs that come after the scheme,
305 # being careful not to roll into the next scheme
306 match = www_auth.search(the_rest)
309 if match and len(match.groups()) == 3:
310 (key, value, the_rest) = match.groups()
311 auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
312 match = www_auth.search(the_rest)
313 retval[auth_scheme.lower()] = auth_params
314 authenticate = the_rest.strip()
317 raise MalformedHeader("WWW-Authenticate")
321 def _entry_disposition(response_headers, request_headers):
322 """Determine freshness from the Date, Expires and Cache-Control headers.
324 We don't handle the following:
326 1. Cache-Control: max-stale
327 2. Age: headers are not used in the calculations.
329 Not that this algorithm is simpler than you might think
330 because we are operating as a private (non-shared) cache.
331 This lets us ignore 's-maxage'. We can also ignore
332 'proxy-invalidate' since we aren't a proxy.
333 We will never return a stale document as
334 fresh as a design decision, and thus the non-implementation
335 of 'max-stale'. This also lets us safely ignore 'must-revalidate'
336 since we operate as if every server has sent 'must-revalidate'.
337 Since we are private we get to ignore both 'public' and
338 'private' parameters. We also ignore 'no-transform' since
339 we don't do any transformations.
340 The 'no-store' parameter is handled at a higher level.
341 So the only Cache-Control parameters we look at are:
350 cc = _parse_cache_control(request_headers)
351 cc_response = _parse_cache_control(response_headers)
353 if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
354 retval = "TRANSPARENT"
355 if 'cache-control' not in request_headers:
356 request_headers['cache-control'] = 'no-cache'
357 elif cc.has_key('no-cache'):
358 retval = "TRANSPARENT"
359 elif cc_response.has_key('no-cache'):
361 elif cc.has_key('only-if-cached'):
363 elif response_headers.has_key('date'):
364 date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))
366 current_age = max(0, now - date)
367 if cc_response.has_key('max-age'):
369 freshness_lifetime = int(cc_response['max-age'])
371 freshness_lifetime = 0
372 elif response_headers.has_key('expires'):
373 expires = email.Utils.parsedate_tz(response_headers['expires'])
375 freshness_lifetime = 0
377 freshness_lifetime = max(0, calendar.timegm(expires) - date)
379 freshness_lifetime = 0
380 if cc.has_key('max-age'):
382 freshness_lifetime = int(cc['max-age'])
384 freshness_lifetime = 0
385 if cc.has_key('min-fresh'):
387 min_fresh = int(cc['min-fresh'])
390 current_age += min_fresh
391 if freshness_lifetime > current_age:
395 def _decompressContent(response, new_content):
396 content = new_content
398 encoding = response.get('content-encoding', None)
399 if encoding in ['gzip', 'deflate']:
400 if encoding == 'gzip':
401 content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
402 if encoding == 'deflate':
403 content = zlib.decompress(content)
404 response['content-length'] = str(len(content))
405 # Record the historical presence of the encoding in a way the won't interfere.
406 response['-content-encoding'] = response['content-encoding']
407 del response['content-encoding']
410 raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
413 def _updateCache(request_headers, response_headers, content, cache, cachekey):
415 cc = _parse_cache_control(request_headers)
416 cc_response = _parse_cache_control(response_headers)
417 if cc.has_key('no-store') or cc_response.has_key('no-store'):
418 cache.delete(cachekey)
420 info = email.Message.Message()
421 for key, value in response_headers.iteritems():
422 if key not in ['status','content-encoding','transfer-encoding']:
425 # Add annotations to the cache to indicate what headers
426 # are variant for this request.
427 vary = response_headers.get('vary', None)
429 vary_headers = vary.lower().replace(' ', '').split(',')
430 for header in vary_headers:
431 key = '-varied-%s' % header
433 info[key] = request_headers[header]
437 status = response_headers.status
441 status_header = 'status: %d\r\n' % status
443 header_str = info.as_string()
445 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
446 text = "".join([status_header, header_str, content])
448 cache.set(cachekey, text)
451 dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
454 def _wsse_username_token(cnonce, iso_now, password):
455 return base64.b64encode(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
458 # For credentials we need two things, first
459 # a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
460 # Then we also need a list of URIs that have already demanded authentication
461 # That list is tricky since sub-URIs can take the same auth, or the
462 # auth scheme may change as you descend the tree.
463 # So we also need each Auth instance to be able to tell us
464 # how close to the 'top' it is.
466 class Authentication(object):
467 def __init__(self, credentials, host, request_uri, headers, response, content, http):
468 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
471 self.credentials = credentials
474 def depth(self, request_uri):
475 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
476 return request_uri[len(self.path):].count("/")
478 def inscope(self, host, request_uri):
479 # XXX Should we normalize the request_uri?
480 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
481 return (host == self.host) and path.startswith(self.path)
483 def request(self, method, request_uri, headers, content):
484 """Modify the request headers to add the appropriate
485 Authorization header. Over-ride this in sub-classes."""
488 def response(self, response, content):
489 """Gives us a chance to update with new nonces
490 or such returned from the last authorized response.
491 Over-rise this in sub-classes if necessary.
493 Return TRUE is the request is to be retried, for
494 example Digest may return stale=true.
500 class BasicAuthentication(Authentication):
501 def __init__(self, credentials, host, request_uri, headers, response, content, http):
502 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
504 def request(self, method, request_uri, headers, content):
505 """Modify the request headers to add the appropriate
506 Authorization header."""
507 headers['authorization'] = 'Basic ' + base64.b64encode("%s:%s" % self.credentials).strip()
510 class DigestAuthentication(Authentication):
511 """Only do qop='auth' and MD5, since that
512 is all Apache currently implements"""
513 def __init__(self, credentials, host, request_uri, headers, response, content, http):
514 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
515 challenge = _parse_www_authenticate(response, 'www-authenticate')
516 self.challenge = challenge['digest']
517 qop = self.challenge.get('qop', 'auth')
518 self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
519 if self.challenge['qop'] is None:
520 raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
521 self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()
522 if self.challenge['algorithm'] != 'MD5':
523 raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
524 self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
525 self.challenge['nc'] = 1
527 def request(self, method, request_uri, headers, content, cnonce = None):
528 """Modify the request headers"""
529 H = lambda x: _md5(x).hexdigest()
530 KD = lambda s, d: H("%s:%s" % (s, d))
531 A2 = "".join([method, ":", request_uri])
532 self.challenge['cnonce'] = cnonce or _cnonce()
533 request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (
534 self.challenge['nonce'],
535 '%08x' % self.challenge['nc'],
536 self.challenge['cnonce'],
537 self.challenge['qop'], H(A2)))
538 headers['authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
540 self.challenge['realm'],
541 self.challenge['nonce'],
543 self.challenge['algorithm'],
545 self.challenge['qop'],
546 self.challenge['nc'],
547 self.challenge['cnonce'])
548 if self.challenge.get('opaque'):
549 headers['authorization'] += ', opaque="%s"' % self.challenge['opaque']
550 self.challenge['nc'] += 1
552 def response(self, response, content):
553 if not response.has_key('authentication-info'):
554 challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
555 if 'true' == challenge.get('stale'):
556 self.challenge['nonce'] = challenge['nonce']
557 self.challenge['nc'] = 1
560 updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
562 if updated_challenge.has_key('nextnonce'):
563 self.challenge['nonce'] = updated_challenge['nextnonce']
564 self.challenge['nc'] = 1
568 class HmacDigestAuthentication(Authentication):
569 """Adapted from Robert Sayre's code and DigestAuthentication above."""
570 __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
572 def __init__(self, credentials, host, request_uri, headers, response, content, http):
573 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
574 challenge = _parse_www_authenticate(response, 'www-authenticate')
575 self.challenge = challenge['hmacdigest']
576 # TODO: self.challenge['domain']
577 self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
578 if self.challenge['reason'] not in ['unauthorized', 'integrity']:
579 self.challenge['reason'] = 'unauthorized'
580 self.challenge['salt'] = self.challenge.get('salt', '')
581 if not self.challenge.get('snonce'):
582 raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))
583 self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')
584 if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
585 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
586 self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')
587 if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
588 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
589 if self.challenge['algorithm'] == 'HMAC-MD5':
593 if self.challenge['pw-algorithm'] == 'MD5':
594 self.pwhashmod = _md5
596 self.pwhashmod = _sha
597 self.key = "".join([self.credentials[0], ":",
598 self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
599 ":", self.challenge['realm']])
600 self.key = self.pwhashmod.new(self.key).hexdigest().lower()
602 def request(self, method, request_uri, headers, content):
603 """Modify the request headers"""
604 keys = _get_end2end_headers(headers)
605 keylist = "".join(["%s " % k for k in keys])
606 headers_val = "".join([headers[k] for k in keys])
607 created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
609 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
610 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
611 headers['authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
613 self.challenge['realm'],
614 self.challenge['snonce'],
621 def response(self, response, content):
622 challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})
623 if challenge.get('reason') in ['integrity', 'stale']:
628 class WsseAuthentication(Authentication):
629 """This is thinly tested and should not be relied upon.
630 At this time there isn't any third party server to test against.
631 Blogger and TypePad implemented this algorithm at one point
632 but Blogger has since switched to Basic over HTTPS and
633 TypePad has implemented it wrong, by never issuing a 401
634 challenge but instead requiring your client to telepathically know that
635 their endpoint is expecting WSSE profile="UsernameToken"."""
636 def __init__(self, credentials, host, request_uri, headers, response, content, http):
637 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
639 def request(self, method, request_uri, headers, content):
640 """Modify the request headers to add the appropriate
641 Authorization header."""
642 headers['authorization'] = 'WSSE profile="UsernameToken"'
643 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
645 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
646 headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
652 class GoogleLoginAuthentication(Authentication):
653 def __init__(self, credentials, host, request_uri, headers, response, content, http):
654 from urllib import urlencode
655 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
656 challenge = _parse_www_authenticate(response, 'www-authenticate')
657 service = challenge['googlelogin'].get('service', 'xapi')
658 # Bloggger actually returns the service in the challenge
659 # For the rest we guess based on the URI
660 if service == 'xapi' and request_uri.find("calendar") > 0:
662 # No point in guessing Base or Spreadsheet
663 #elif request_uri.find("spreadsheets") > 0:
666 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
667 resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
668 lines = content.split('\n')
669 d = dict([tuple(line.split("=", 1)) for line in lines if line])
670 if resp.status == 403:
673 self.Auth = d['Auth']
675 def request(self, method, request_uri, headers, content):
676 """Modify the request headers to add the appropriate
677 Authorization header."""
678 headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
681 AUTH_SCHEME_CLASSES = {
682 "basic": BasicAuthentication,
683 "wsse": WsseAuthentication,
684 "digest": DigestAuthentication,
685 "hmacdigest": HmacDigestAuthentication,
686 "googlelogin": GoogleLoginAuthentication
689 AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
691 class FileCache(object):
692 """Uses a local directory as a store for cached files.
693 Not really safe to use if multiple threads or processes are going to
694 be running on the same cache.
696 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
699 if not os.path.exists(cache):
700 os.makedirs(self.cache)
704 cacheFullPath = os.path.join(self.cache, self.safe(key))
706 f = file(cacheFullPath, "rb")
713 def set(self, key, value):
714 cacheFullPath = os.path.join(self.cache, self.safe(key))
715 f = file(cacheFullPath, "wb")
719 def delete(self, key):
720 cacheFullPath = os.path.join(self.cache, self.safe(key))
721 if os.path.exists(cacheFullPath):
722 os.remove(cacheFullPath)
724 class Credentials(object):
726 self.credentials = []
728 def add(self, name, password, domain=""):
729 self.credentials.append((domain.lower(), name, password))
732 self.credentials = []
734 def iter(self, domain):
735 for (cdomain, name, password) in self.credentials:
736 if cdomain == "" or domain == cdomain:
737 yield (name, password)
739 class KeyCerts(Credentials):
740 """Identical to Credentials except that
741 name/password are mapped to key/cert."""
744 class AllHosts(object):
747 class ProxyInfo(object):
748 """Collect information required to use a proxy."""
751 def __init__(self, proxy_type, proxy_host, proxy_port,
752 proxy_rdns=None, proxy_user=None, proxy_pass=None):
753 """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
754 constants. For example:
756 p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP,
757 proxy_host='localhost', proxy_port=8000)
759 self.proxy_type = proxy_type
760 self.proxy_host = proxy_host
761 self.proxy_port = proxy_port
762 self.proxy_rdns = proxy_rdns
763 self.proxy_user = proxy_user
764 self.proxy_pass = proxy_pass
767 return (self.proxy_type, self.proxy_host, self.proxy_port,
768 self.proxy_rdns, self.proxy_user, self.proxy_pass)
771 return (self.proxy_host != None) and (self.proxy_port != None)
773 def applies_to(self, hostname):
774 return not self.bypass_host(hostname)
776 def bypass_host(self, hostname):
777 """Has this host been excluded from the proxy config"""
778 if self.bypass_hosts is AllHosts:
782 for domain in self.bypass_hosts:
783 if hostname.endswith(domain):
789 def proxy_info_from_environment(method='http'):
791 Read proxy info from the environment variables.
793 if method not in ['http', 'https']:
796 env_var = method + '_proxy'
797 url = os.environ.get(env_var, os.environ.get(env_var.upper()))
800 pi = proxy_info_from_url(url, method)
802 no_proxy = os.environ.get('no_proxy', os.environ.get('NO_PROXY', ''))
805 bypass_hosts = no_proxy.split(',')
806 # special case, no_proxy=* means all hosts bypassed
808 bypass_hosts = AllHosts
810 pi.bypass_hosts = bypass_hosts
813 def proxy_info_from_url(url, method='http'):
815 Construct a ProxyInfo from a URL (such as http_proxy env var)
817 url = urlparse.urlparse(url)
822 ident, host_port = url[1].split('@', 1)
824 username, password = ident.split(':', 1)
830 host, port = host_port.split(':', 1)
837 port = dict(https=443, http=80)[method]
839 proxy_type = 3 # socks.PROXY_TYPE_HTTP
841 proxy_type = proxy_type,
844 proxy_user = username or None,
845 proxy_pass = password or None,
849 class HTTPConnectionWithTimeout(httplib.HTTPConnection):
851 HTTPConnection subclass that supports timeouts
853 All timeouts are in seconds. If None is passed for timeout then
854 Python's default timeout for sockets will be used. See for example
855 the docs of socket.setdefaulttimeout():
856 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
859 def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
860 httplib.HTTPConnection.__init__(self, host, port, strict)
861 self.timeout = timeout
862 self.proxy_info = proxy_info
865 """Connect to the host and port specified in __init__."""
866 # Mostly verbatim from httplib.py.
867 if self.proxy_info and socks is None:
868 raise ProxiesUnavailableError(
869 'Proxy support missing but proxy use was requested!')
870 msg = "getaddrinfo returns an empty list"
871 if self.proxy_info and self.proxy_info.isgood():
873 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass = self.proxy_info.astuple()
876 if use_proxy and proxy_rdns:
883 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
884 af, socktype, proto, canonname, sa = res
887 self.sock = socks.socksocket(af, socktype, proto)
888 self.sock.setproxy(proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass)
890 self.sock = socket.socket(af, socktype, proto)
891 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
892 # Different from httplib: support timeouts.
893 if has_timeout(self.timeout):
894 self.sock.settimeout(self.timeout)
895 # End of difference from httplib.
896 if self.debuglevel > 0:
897 print "connect: (%s, %s) ************" % (self.host, self.port)
899 print "proxy: %s ************" % str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass))
901 self.sock.connect((self.host, self.port) + sa[2:])
902 except socket.error, msg:
903 if self.debuglevel > 0:
904 print "connect fail: (%s, %s)" % (self.host, self.port)
906 print "proxy: %s" % str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass))
913 raise socket.error, msg
915 class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
917 This class allows communication via SSL.
919 All timeouts are in seconds. If None is passed for timeout then
920 Python's default timeout for sockets will be used. See for example
921 the docs of socket.setdefaulttimeout():
922 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
924 def __init__(self, host, port=None, key_file=None, cert_file=None,
925 strict=None, timeout=None, proxy_info=None,
926 ca_certs=None, disable_ssl_certificate_validation=False):
927 httplib.HTTPSConnection.__init__(self, host, port=port,
929 cert_file=cert_file, strict=strict)
930 self.timeout = timeout
931 self.proxy_info = proxy_info
934 self.ca_certs = ca_certs
935 self.disable_ssl_certificate_validation = \
936 disable_ssl_certificate_validation
938 # The following two methods were adapted from https_wrapper.py, released
939 # with the Google Appengine SDK at
940 # http://googleappengine.googlecode.com/svn-history/r136/trunk/python/google/appengine/tools/https_wrapper.py
941 # under the following license:
943 # Copyright 2007 Google Inc.
945 # Licensed under the Apache License, Version 2.0 (the "License");
946 # you may not use this file except in compliance with the License.
947 # You may obtain a copy of the License at
949 # http://www.apache.org/licenses/LICENSE-2.0
951 # Unless required by applicable law or agreed to in writing, software
952 # distributed under the License is distributed on an "AS IS" BASIS,
953 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
954 # See the License for the specific language governing permissions and
955 # limitations under the License.
958 def _GetValidHostsForCert(self, cert):
959 """Returns a list of valid host globs for an SSL certificate.
962 cert: A dictionary representing an SSL certificate.
964 list: A list of valid host globs.
966 if 'subjectAltName' in cert:
967 return [x[1] for x in cert['subjectAltName']
968 if x[0].lower() == 'dns']
970 return [x[0][1] for x in cert['subject']
971 if x[0][0].lower() == 'commonname']
973 def _ValidateCertificateHostname(self, cert, hostname):
974 """Validates that a given hostname is valid for an SSL certificate.
977 cert: A dictionary representing an SSL certificate.
978 hostname: The hostname to test.
980 bool: Whether or not the hostname is valid for this certificate.
982 hosts = self._GetValidHostsForCert(cert)
984 host_re = host.replace('.', '\.').replace('*', '[^.]*')
985 if re.search('^%s$' % (host_re,), hostname, re.I):
990 "Connect to a host on a given (SSL) port."
992 msg = "getaddrinfo returns an empty list"
993 if self.proxy_info and self.proxy_info.isgood():
995 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass = self.proxy_info.astuple()
998 if use_proxy and proxy_rdns:
1005 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1006 for family, socktype, proto, canonname, sockaddr in address_info:
1009 sock = socks.socksocket(family, socktype, proto)
1011 sock.setproxy(proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass)
1013 sock = socket.socket(family, socktype, proto)
1014 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
1016 if has_timeout(self.timeout):
1017 sock.settimeout(self.timeout)
1018 sock.connect((self.host, self.port))
1019 self.sock =_ssl_wrap_socket(
1020 sock, self.key_file, self.cert_file,
1021 self.disable_ssl_certificate_validation, self.ca_certs)
1022 if self.debuglevel > 0:
1023 print "connect: (%s, %s)" % (self.host, self.port)
1025 print "proxy: %s" % str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass))
1026 if not self.disable_ssl_certificate_validation:
1027 cert = self.sock.getpeercert()
1028 hostname = self.host.split(':', 0)[0]
1029 if not self._ValidateCertificateHostname(cert, hostname):
1030 raise CertificateHostnameMismatch(
1031 'Server presented certificate that does not match '
1032 'host %s: %s' % (hostname, cert), hostname, cert)
1033 except ssl_SSLError, e:
1039 # Unfortunately the ssl module doesn't seem to provide any way
1040 # to get at more detailed error information, in particular
1041 # whether the error is due to certificate validation or
1042 # something else (such as SSL protocol mismatch).
1043 if e.errno == ssl.SSL_ERROR_SSL:
1044 raise SSLHandshakeError(e)
1047 except (socket.timeout, socket.gaierror):
1049 except socket.error, msg:
1050 if self.debuglevel > 0:
1051 print "connect fail: (%s, %s)" % (self.host, self.port)
1053 print "proxy: %s" % str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass))
1060 raise socket.error, msg
1062 SCHEME_TO_CONNECTION = {
1063 'http': HTTPConnectionWithTimeout,
1064 'https': HTTPSConnectionWithTimeout
1069 """An HTTP client that handles:
1082 def __init__(self, cache=None, timeout=None,
1083 proxy_info=proxy_info_from_environment,
1084 ca_certs=None, disable_ssl_certificate_validation=False):
1085 """If 'cache' is a string then it is used as a directory name for
1086 a disk cache. Otherwise it must be an object that supports the
1087 same interface as FileCache.
1089 All timeouts are in seconds. If None is passed for timeout
1090 then Python's default timeout for sockets will be used. See
1091 for example the docs of socket.setdefaulttimeout():
1092 http://docs.python.org/library/socket.html#socket.setdefaulttimeout
1094 `proxy_info` may be:
1095 - a callable that takes the http scheme ('http' or 'https') and
1096 returns a ProxyInfo instance per request. By default, uses
1097 proxy_nfo_from_environment.
1098 - a ProxyInfo instance (static proxy config).
1099 - None (proxy disabled).
1101 ca_certs is the path of a file containing root CA certificates for SSL
1102 server certificate validation. By default, a CA cert file bundled with
1105 If disable_ssl_certificate_validation is true, SSL cert validation will
1108 self.proxy_info = proxy_info
1109 self.ca_certs = ca_certs
1110 self.disable_ssl_certificate_validation = \
1111 disable_ssl_certificate_validation
1113 # Map domain name to an httplib connection
1114 self.connections = {}
1115 # The location of the cache, for now a directory
1116 # where cached responses are held.
1117 if cache and isinstance(cache, basestring):
1118 self.cache = FileCache(cache)
1123 self.credentials = Credentials()
1126 self.certificates = KeyCerts()
1128 # authorization objects
1129 self.authorizations = []
1131 # If set to False then no redirects are followed, even safe ones.
1132 self.follow_redirects = True
1134 # Which HTTP methods do we apply optimistic concurrency to, i.e.
1135 # which methods get an "if-match:" etag header added to them.
1136 self.optimistic_concurrency_methods = ["PUT", "PATCH"]
1138 # If 'follow_redirects' is True, and this is set to True then
1139 # all redirecs are followed, including unsafe ones.
1140 self.follow_all_redirects = False
1142 self.ignore_etag = False
1144 self.force_exception_to_status_code = False
1146 self.timeout = timeout
1148 # Keep Authorization: headers on a redirect.
1149 self.forward_authorization_headers = False
1151 def __getstate__(self):
1152 state_dict = copy.copy(self.__dict__)
1153 # In case request is augmented by some foreign object such as
1154 # credentials which handle auth
1155 if 'request' in state_dict:
1156 del state_dict['request']
1157 if 'connections' in state_dict:
1158 del state_dict['connections']
1161 def __setstate__(self, state):
1162 self.__dict__.update(state)
1163 self.connections = {}
1165 def _auth_from_challenge(self, host, request_uri, headers, response, content):
1166 """A generator that creates Authorization objects
1167 that can be applied to requests.
1169 challenges = _parse_www_authenticate(response, 'www-authenticate')
1170 for cred in self.credentials.iter(host):
1171 for scheme in AUTH_SCHEME_ORDER:
1172 if challenges.has_key(scheme):
1173 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
1175 def add_credentials(self, name, password, domain=""):
1176 """Add a name and password that will be used
1177 any time a request requires authentication."""
1178 self.credentials.add(name, password, domain)
1180 def add_certificate(self, key, cert, domain):
1181 """Add a key and cert that will be used
1182 any time a request requires authentication."""
1183 self.certificates.add(key, cert, domain)
1185 def clear_credentials(self):
1186 """Remove all the names and passwords
1187 that are used for authentication"""
1188 self.credentials.clear()
1189 self.authorizations = []
1191 def _conn_request(self, conn, request_uri, method, body, headers):
1192 for i in range(RETRIES):
1194 if hasattr(conn, 'sock') and conn.sock is None:
1196 conn.request(method, request_uri, body, headers)
1197 except socket.timeout:
1199 except socket.gaierror:
1201 raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
1202 except ssl_SSLError:
1205 except socket.error, e:
1207 if hasattr(e, 'args'):
1208 err = getattr(e, 'args')[0]
1211 if err == errno.ECONNREFUSED: # Connection refused
1213 except httplib.HTTPException:
1214 # Just because the server closed the connection doesn't apparently mean
1215 # that the server didn't send a response.
1216 if hasattr(conn, 'sock') and conn.sock is None:
1229 response = conn.getresponse()
1230 except (socket.error, httplib.HTTPException):
1240 if method == "HEAD":
1243 content = response.read()
1244 response = Response(response)
1245 if method != "HEAD":
1246 content = _decompressContent(response, content)
1248 return (response, content)
1251 def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):
1252 """Do the actual request using the connection object
1253 and also follow one level of redirects if necessary"""
1255 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
1256 auth = auths and sorted(auths)[0][1] or None
1258 auth.request(method, request_uri, headers, body)
1260 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1263 if auth.response(response, body):
1264 auth.request(method, request_uri, headers, body)
1265 (response, content) = self._conn_request(conn, request_uri, method, body, headers )
1266 response._stale_digest = 1
1268 if response.status == 401:
1269 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
1270 authorization.request(method, request_uri, headers, body)
1271 (response, content) = self._conn_request(conn, request_uri, method, body, headers, )
1272 if response.status != 401:
1273 self.authorizations.append(authorization)
1274 authorization.response(response, body)
1277 if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
1278 if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
1279 # Pick out the location header and basically start from the beginning
1280 # remembering first to strip the ETag header and decrement our 'depth'
1282 if not response.has_key('location') and response.status != 300:
1283 raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
1284 # Fix-up relative redirects (which violate an RFC 2616 MUST)
1285 if response.has_key('location'):
1286 location = response['location']
1287 (scheme, authority, path, query, fragment) = parse_uri(location)
1288 if authority == None:
1289 response['location'] = urlparse.urljoin(absolute_uri, location)
1290 if response.status == 301 and method in ["GET", "HEAD"]:
1291 response['-x-permanent-redirect-url'] = response['location']
1292 if not response.has_key('content-location'):
1293 response['content-location'] = absolute_uri
1294 _updateCache(headers, response, content, self.cache, cachekey)
1295 if headers.has_key('if-none-match'):
1296 del headers['if-none-match']
1297 if headers.has_key('if-modified-since'):
1298 del headers['if-modified-since']
1299 if 'authorization' in headers and not self.forward_authorization_headers:
1300 del headers['authorization']
1301 if response.has_key('location'):
1302 location = response['location']
1303 old_response = copy.deepcopy(response)
1304 if not old_response.has_key('content-location'):
1305 old_response['content-location'] = absolute_uri
1306 redirect_method = method
1307 if response.status in [302, 303]:
1308 redirect_method = "GET"
1310 (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
1311 response.previous = old_response
1313 raise RedirectLimit("Redirected more times than rediection_limit allows.", response, content)
1314 elif response.status in [200, 203] and method in ["GET", "HEAD"]:
1315 # Don't cache 206's since we aren't going to handle byte range requests
1316 if not response.has_key('content-location'):
1317 response['content-location'] = absolute_uri
1318 _updateCache(headers, response, content, self.cache, cachekey)
1320 return (response, content)
1322 def _normalize_headers(self, headers):
1323 return _normalize_headers(headers)
1325 # Need to catch and rebrand some exceptions
1326 # Then need to optionally turn all exceptions into status codes
1327 # including all socket.* and httplib.* exceptions.
1330 def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
1331 """ Performs a single HTTP request.
1333 The 'uri' is the URI of the HTTP resource and can begin with either
1334 'http' or 'https'. The value of 'uri' must be an absolute URI.
1336 The 'method' is the HTTP method to perform, such as GET, POST, DELETE,
1337 etc. There is no restriction on the methods allowed.
1339 The 'body' is the entity body to be sent with the request. It is a
1342 Any extra headers that are to be sent with the request should be
1343 provided in the 'headers' dictionary.
1345 The maximum number of redirect to follow before raising an
1346 exception is 'redirections. The default is 5.
1348 The return value is a tuple of (response, content), the first
1349 being and instance of the 'Response' class, the second being
1350 a string that contains the response entity body.
1356 headers = self._normalize_headers(headers)
1358 if not headers.has_key('user-agent'):
1359 headers['user-agent'] = "Python-httplib2/%s (gzip)" % __version__
1363 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
1364 domain_port = authority.split(":")[0:2]
1365 if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':
1367 authority = domain_port[0]
1369 proxy_info = self._get_proxy_info(scheme, authority)
1371 conn_key = scheme+":"+authority
1372 if conn_key in self.connections:
1373 conn = self.connections[conn_key]
1375 if not connection_type:
1376 connection_type = SCHEME_TO_CONNECTION[scheme]
1377 certs = list(self.certificates.iter(authority))
1378 if scheme == 'https':
1380 conn = self.connections[conn_key] = connection_type(
1381 authority, key_file=certs[0][0],
1382 cert_file=certs[0][1], timeout=self.timeout,
1383 proxy_info=proxy_info,
1384 ca_certs=self.ca_certs,
1385 disable_ssl_certificate_validation=
1386 self.disable_ssl_certificate_validation)
1388 conn = self.connections[conn_key] = connection_type(
1389 authority, timeout=self.timeout,
1390 proxy_info=proxy_info,
1391 ca_certs=self.ca_certs,
1392 disable_ssl_certificate_validation=
1393 self.disable_ssl_certificate_validation)
1395 conn = self.connections[conn_key] = connection_type(
1396 authority, timeout=self.timeout,
1397 proxy_info=proxy_info)
1398 conn.set_debuglevel(debuglevel)
1400 if 'range' not in headers and 'accept-encoding' not in headers:
1401 headers['accept-encoding'] = 'gzip, deflate'
1403 info = email.Message.Message()
1406 cachekey = defrag_uri
1407 cached_value = self.cache.get(cachekey)
1409 # info = email.message_from_string(cached_value)
1411 # Need to replace the line above with the kludge below
1412 # to fix the non-existent bug not fixed in this
1413 # bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html
1415 info, content = cached_value.split('\r\n\r\n', 1)
1416 feedparser = email.FeedParser.FeedParser()
1417 feedparser.feed(info)
1418 info = feedparser.close()
1419 feedparser._parse = None
1420 except (IndexError, ValueError):
1421 self.cache.delete(cachekey)
1427 if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
1428 # http://www.w3.org/1999/04/Editing/
1429 headers['if-match'] = info['etag']
1431 if method not in ["GET", "HEAD"] and self.cache and cachekey:
1432 # RFC 2616 Section 13.10
1433 self.cache.delete(cachekey)
1435 # Check the vary header in the cache to see if this request
1436 # matches what varies in the cache.
1437 if method in ['GET', 'HEAD'] and 'vary' in info:
1439 vary_headers = vary.lower().replace(' ', '').split(',')
1440 for header in vary_headers:
1441 key = '-varied-%s' % header
1443 if headers.get(header, None) != value:
1447 if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
1448 if info.has_key('-x-permanent-redirect-url'):
1449 # Should cached permanent redirects be counted in our redirection count? For now, yes.
1450 if redirections <= 0:
1451 raise RedirectLimit("Redirected more times than rediection_limit allows.", {}, "")
1452 (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
1453 response.previous = Response(info)
1454 response.previous.fromcache = True
1456 # Determine our course of action:
1457 # Is the cached entry fresh or stale?
1458 # Has the client requested a non-cached response?
1460 # There seems to be three possible answers:
1461 # 1. [FRESH] Return the cache entry w/o doing a GET
1462 # 2. [STALE] Do the GET (but add in cache validators if available)
1463 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
1464 entry_disposition = _entry_disposition(info, headers)
1466 if entry_disposition == "FRESH":
1467 if not cached_value:
1468 info['status'] = '504'
1470 response = Response(info)
1472 response.fromcache = True
1473 return (response, content)
1475 if entry_disposition == "STALE":
1476 if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
1477 headers['if-none-match'] = info['etag']
1478 if info.has_key('last-modified') and not 'last-modified' in headers:
1479 headers['if-modified-since'] = info['last-modified']
1480 elif entry_disposition == "TRANSPARENT":
1483 (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1485 if response.status == 304 and method == "GET":
1486 # Rewrite the cache entry with the new end-to-end headers
1487 # Take all headers that are in response
1488 # and overwrite their values in info.
1489 # unless they are hop-by-hop, or are listed in the connection header.
1491 for key in _get_end2end_headers(response):
1492 info[key] = response[key]
1493 merged_response = Response(info)
1494 if hasattr(response, "_stale_digest"):
1495 merged_response._stale_digest = response._stale_digest
1496 _updateCache(headers, merged_response, content, self.cache, cachekey)
1497 response = merged_response
1498 response.status = 200
1499 response.fromcache = True
1501 elif response.status == 200:
1502 content = new_content
1504 self.cache.delete(cachekey)
1505 content = new_content
1507 cc = _parse_cache_control(headers)
1508 if cc.has_key('only-if-cached'):
1509 info['status'] = '504'
1510 response = Response(info)
1513 (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1514 except Exception, e:
1515 if self.force_exception_to_status_code:
1516 if isinstance(e, HttpLib2ErrorWithResponse):
1517 response = e.response
1519 response.status = 500
1520 response.reason = str(e)
1521 elif isinstance(e, socket.timeout):
1522 content = "Request Timeout"
1523 response = Response({
1524 "content-type": "text/plain",
1526 "content-length": len(content)
1528 response.reason = "Request Timeout"
1531 response = Response({
1532 "content-type": "text/plain",
1534 "content-length": len(content)
1536 response.reason = "Bad Request"
1541 return (response, content)
1543 def _get_proxy_info(self, scheme, authority):
1544 """Return a ProxyInfo instance (or None) based on the scheme
1547 hostname, port = urllib.splitport(authority)
1548 proxy_info = self.proxy_info
1549 if callable(proxy_info):
1550 proxy_info = proxy_info(scheme)
1552 if (hasattr(proxy_info, 'applies_to')
1553 and not proxy_info.applies_to(hostname)):
1558 class Response(dict):
1559 """An object more like email.Message than httplib.HTTPResponse."""
1561 """Is this response from our local cache"""
1564 """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """
1567 "Status code returned by server. "
1570 """Reason phrase returned by server."""
1575 def __init__(self, info):
1576 # info is either an email.Message or
1577 # an httplib.HTTPResponse object.
1578 if isinstance(info, httplib.HTTPResponse):
1579 for key, value in info.getheaders():
1580 self[key.lower()] = value
1581 self.status = info.status
1582 self['status'] = str(self.status)
1583 self.reason = info.reason
1584 self.version = info.version
1585 elif isinstance(info, email.Message.Message):
1586 for key, value in info.items():
1587 self[key.lower()] = value
1588 self.status = int(self['status'])
1590 for key, value in info.iteritems():
1591 self[key.lower()] = value
1592 self.status = int(self.get('status', self.status))
1593 self.reason = self.get('reason', self.reason)
1596 def __getattr__(self, name):
1600 raise AttributeError, name