twisted/web/client.py

   1 # -*- test-case-name: twisted.web.test.test_webclient -*-
   2 # Copyright (c) Twisted Matrix Laboratories.
   3 # See LICENSE for details.
   4
   5 """
   6 HTTP client.
   7 """
   8
   9 import os, types
  10 from urlparse import urlunparse
  11 from urllib import splithost, splittype
  12 import zlib
  13
  14 from zope.interface import implements
  15
  16 from twisted.python import log
  17 from twisted.python.failure import Failure
  18 from twisted.web import http
  19 from twisted.internet import defer, protocol, task, reactor
  20 from twisted.internet.interfaces import IProtocol
  21 from twisted.internet.endpoints import TCP4ClientEndpoint, SSL4ClientEndpoint
  22 from twisted.python import failure
  23 from twisted.python.util import InsensitiveDict
  24 from twisted.python.components import proxyForInterface
  25 from twisted.web import error
  26 from twisted.web.iweb import UNKNOWN_LENGTH, IBodyProducer, IResponse
  27 from twisted.web.http_headers import Headers
  28 from twisted.python.compat import set
  29
  30
  31 class PartialDownloadError(error.Error):
  32     """
  33     Page was only partially downloaded, we got disconnected in middle.
  34
  35     @ivar response: All of the response body which was downloaded.
  36     """
  37
  38
  39 class HTTPPageGetter(http.HTTPClient):
  40     """
  41     Gets a resource via HTTP, then quits.
  42
  43     Typically used with L{HTTPClientFactory}.  Note that this class does not, by
  44     itself, do anything with the response.  If you want to download a resource
  45     into a file, use L{HTTPPageDownloader} instead.
  46
  47     @ivar _completelyDone: A boolean indicating whether any further requests are
  48         necessary after this one completes in order to provide a result to
  49         C{self.factory.deferred}.  If it is C{False}, then a redirect is going
  50         to be followed.  Otherwise, this protocol's connection is the last one
  51         before firing the result Deferred.  This is used to make sure the result
  52         Deferred is only fired after the connection is cleaned up.
  53     """
  54
  55     quietLoss = 0
  56     followRedirect = True
  57     failed = 0
  58
  59     _completelyDone = True
  60
  61     _specialHeaders = set(('host', 'user-agent', 'cookie', 'content-length'))
  62
  63     def connectionMade(self):
  64         method = getattr(self.factory, 'method', 'GET')
  65         self.sendCommand(method, self.factory.path)
  66         if self.factory.scheme == 'http' and self.factory.port != 80:
  67             host = '%s:%s' % (self.factory.host, self.factory.port)
  68         elif self.factory.scheme == 'https' and self.factory.port != 443:
  69             host = '%s:%s' % (self.factory.host, self.factory.port)
  70         else:
  71             host = self.factory.host
  72         self.sendHeader('Host', self.factory.headers.get("host", host))
  73         self.sendHeader('User-Agent', self.factory.agent)
  74         data = getattr(self.factory, 'postdata', None)
  75         if data is not None:
  76             self.sendHeader("Content-Length", str(len(data)))
  77
  78         cookieData = []
  79         for (key, value) in self.factory.headers.items():
  80             if key.lower() not in self._specialHeaders:
  81                 # we calculated it on our own
  82                 self.sendHeader(key, value)
  83             if key.lower() == 'cookie':
  84                 cookieData.append(value)
  85         for cookie, cookval in self.factory.cookies.items():
  86             cookieData.append('%s=%s' % (cookie, cookval))
  87         if cookieData:
  88             self.sendHeader('Cookie', '; '.join(cookieData))
  89         self.endHeaders()
  90         self.headers = {}
  91
  92         if data is not None:
  93             self.transport.write(data)
  94
  95     def handleHeader(self, key, value):
  96         """
  97         Called every time a header is received. Stores the header information
  98         as key-value pairs in the C{headers} attribute.
  99
 100         @type key: C{str}
 101         @param key: An HTTP header field name.
 102
 103         @type value: C{str}
 104         @param value: An HTTP header field value.
 105         """
 106         key = key.lower()
 107         l = self.headers.setdefault(key, [])
 108         l.append(value)
 109
 110     def handleStatus(self, version, status, message):
 111         self.version, self.status, self.message = version, status, message
 112         self.factory.gotStatus(version, status, message)
 113
 114     def handleEndHeaders(self):
 115         self.factory.gotHeaders(self.headers)
 116         m = getattr(self, 'handleStatus_'+self.status, self.handleStatusDefault)
 117         m()
 118
 119     def handleStatus_200(self):
 120         pass
 121
 122     handleStatus_201 = lambda self: self.handleStatus_200()
 123     handleStatus_202 = lambda self: self.handleStatus_200()
 124
 125     def handleStatusDefault(self):
 126         self.failed = 1
 127
 128     def handleStatus_301(self):
 129         l = self.headers.get('location')
 130         if not l:
 131             self.handleStatusDefault()
 132             return
 133         url = l[0]
 134         if self.followRedirect:
 135             scheme, host, port, path = \
 136                 _parse(url, defaultPort=self.transport.getPeer().port)
 137
 138             self.factory._redirectCount += 1
 139             if self.factory._redirectCount >= self.factory.redirectLimit:
 140                 err = error.InfiniteRedirection(
 141                     self.status,
 142                     'Infinite redirection detected',
 143                     location=url)
 144                 self.factory.noPage(failure.Failure(err))
 145                 self.quietLoss = True
 146                 self.transport.loseConnection()
 147                 return
 148
 149             self._completelyDone = False
 150             self.factory.setURL(url)
 151
 152             if self.factory.scheme == 'https':
 153                 from twisted.internet import ssl
 154                 contextFactory = ssl.ClientContextFactory()
 155                 reactor.connectSSL(self.factory.host, self.factory.port,
 156                                    self.factory, contextFactory)
 157             else:
 158                 reactor.connectTCP(self.factory.host, self.factory.port,
 159                                    self.factory)
 160         else:
 161             self.handleStatusDefault()
 162             self.factory.noPage(
 163                 failure.Failure(
 164                     error.PageRedirect(
 165                         self.status, self.message, location = url)))
 166         self.quietLoss = True
 167         self.transport.loseConnection()
 168
 169     def handleStatus_302(self):
 170         if self.afterFoundGet:
 171             self.handleStatus_303()
 172         else:
 173             self.handleStatus_301()
 174
 175
 176     def handleStatus_303(self):
 177         self.factory.method = 'GET'
 178         self.handleStatus_301()
 179
 180
 181     def connectionLost(self, reason):
 182         """
 183         When the connection used to issue the HTTP request is closed, notify the
 184         factory if we have not already, so it can produce a result.
 185         """
 186         if not self.quietLoss:
 187             http.HTTPClient.connectionLost(self, reason)
 188             self.factory.noPage(reason)
 189         if self._completelyDone:
 190             # Only if we think we're completely done do we tell the factory that
 191             # we're "disconnected".  This way when we're following redirects,
 192             # only the last protocol used will fire the _disconnectedDeferred.
 193             self.factory._disconnectedDeferred.callback(None)
 194
 195
 196     def handleResponse(self, response):
 197         if self.quietLoss:
 198             return
 199         if self.failed:
 200             self.factory.noPage(
 201                 failure.Failure(
 202                     error.Error(
 203                         self.status, self.message, response)))
 204         if self.factory.method == 'HEAD':
 205             # Callback with empty string, since there is never a response
 206             # body for HEAD requests.
 207             self.factory.page('')
 208         elif self.length != None and self.length != 0:
 209             self.factory.noPage(failure.Failure(
 210                 PartialDownloadError(self.status, self.message, response)))
 211         else:
 212             self.factory.page(response)
 213         # server might be stupid and not close connection. admittedly
 214         # the fact we do only one request per connection is also
 215         # stupid...
 216         self.transport.loseConnection()
 217
 218     def timeout(self):
 219         self.quietLoss = True
 220         self.transport.loseConnection()
 221         self.factory.noPage(defer.TimeoutError("Getting %s took longer than %s seconds." % (self.factory.url, self.factory.timeout)))
 222
 223
 224 class HTTPPageDownloader(HTTPPageGetter):
 225
 226     transmittingPage = 0
 227
 228     def handleStatus_200(self, partialContent=0):
 229         HTTPPageGetter.handleStatus_200(self)
 230         self.transmittingPage = 1
 231         self.factory.pageStart(partialContent)
 232
 233     def handleStatus_206(self):
 234         self.handleStatus_200(partialContent=1)
 235
 236     def handleResponsePart(self, data):
 237         if self.transmittingPage:
 238             self.factory.pagePart(data)
 239
 240     def handleResponseEnd(self):
 241         if self.length:
 242             self.transmittingPage = 0
 243             self.factory.noPage(
 244                 failure.Failure(
 245                     PartialDownloadError(self.status)))
 246         if self.transmittingPage:
 247             self.factory.pageEnd()
 248             self.transmittingPage = 0
 249         if self.failed:
 250             self.factory.noPage(
 251                 failure.Failure(
 252                     error.Error(
 253                         self.status, self.message, None)))
 254             self.transport.loseConnection()
 255
 256
 257 class HTTPClientFactory(protocol.ClientFactory):
 258     """Download a given URL.
 259
 260     @type deferred: Deferred
 261     @ivar deferred: A Deferred that will fire when the content has
 262           been retrieved. Once this is fired, the ivars `status', `version',
 263           and `message' will be set.
 264
 265     @type status: str
 266     @ivar status: The status of the response.
 267
 268     @type version: str
 269     @ivar version: The version of the response.
 270
 271     @type message: str
 272     @ivar message: The text message returned with the status.
 273
 274     @type response_headers: dict
 275     @ivar response_headers: The headers that were specified in the
 276           response from the server.
 277
 278     @type method: str
 279     @ivar method: The HTTP method to use in the request.  This should be one of
 280         OPTIONS, GET, HEAD, POST, PUT, DELETE, TRACE, or CONNECT (case
 281         matters).  Other values may be specified if the server being contacted
 282         supports them.
 283
 284     @type redirectLimit: int
 285     @ivar redirectLimit: The maximum number of HTTP redirects that can occur
 286           before it is assumed that the redirection is endless.
 287
 288     @type afterFoundGet: C{bool}
 289     @ivar afterFoundGet: Deviate from the HTTP 1.1 RFC by handling redirects
 290         the same way as most web browsers; if the request method is POST and a
 291         302 status is encountered, the redirect is followed with a GET method
 292
 293     @type _redirectCount: int
 294     @ivar _redirectCount: The current number of HTTP redirects encountered.
 295
 296     @ivar _disconnectedDeferred: A L{Deferred} which only fires after the last
 297         connection associated with the request (redirects may cause multiple
 298         connections to be required) has closed.  The result Deferred will only
 299         fire after this Deferred, so that callers can be assured that there are
 300         no more event sources in the reactor once they get the result.
 301     """
 302
 303     protocol = HTTPPageGetter
 304
 305     url = None
 306     scheme = None
 307     host = ''
 308     port = None
 309     path = None
 310
 311     def __init__(self, url, method='GET', postdata=None, headers=None,
 312                  agent="Twisted PageGetter", timeout=0, cookies=None,
 313                  followRedirect=True, redirectLimit=20,
 314                  afterFoundGet=False):
 315         self.followRedirect = followRedirect
 316         self.redirectLimit = redirectLimit
 317         self._redirectCount = 0
 318         self.timeout = timeout
 319         self.agent = agent
 320         self.afterFoundGet = afterFoundGet
 321         if cookies is None:
 322             cookies = {}
 323         self.cookies = cookies
 324         if headers is not None:
 325             self.headers = InsensitiveDict(headers)
 326         else:
 327             self.headers = InsensitiveDict()
 328         if postdata is not None:
 329             self.headers.setdefault('Content-Length', len(postdata))
 330             # just in case a broken http/1.1 decides to keep connection alive
 331             self.headers.setdefault("connection", "close")
 332         self.postdata = postdata
 333         self.method = method
 334
 335         self.setURL(url)
 336
 337         self.waiting = 1
 338         self._disconnectedDeferred = defer.Deferred()
 339         self.deferred = defer.Deferred()
 340         # Make sure the first callback on the result Deferred pauses the
 341         # callback chain until the request connection is closed.
 342         self.deferred.addBoth(self._waitForDisconnect)
 343         self.response_headers = None
 344
 345
 346     def _waitForDisconnect(self, passthrough):
 347         """
 348         Chain onto the _disconnectedDeferred, preserving C{passthrough}, so that
 349         the result is only available after the associated connection has been
 350         closed.
 351         """
 352         self._disconnectedDeferred.addCallback(lambda ignored: passthrough)
 353         return self._disconnectedDeferred
 354
 355
 356     def __repr__(self):
 357         return "<%s: %s>" % (self.__class__.__name__, self.url)
 358
 359     def setURL(self, url):
 360         self.url = url
 361         scheme, host, port, path = _parse(url)
 362         if scheme and host:
 363             self.scheme = scheme
 364             self.host = host
 365             self.port = port
 366         self.path = path
 367
 368     def buildProtocol(self, addr):
 369         p = protocol.ClientFactory.buildProtocol(self, addr)
 370         p.followRedirect = self.followRedirect
 371         p.afterFoundGet = self.afterFoundGet
 372         if self.timeout:
 373             timeoutCall = reactor.callLater(self.timeout, p.timeout)
 374             self.deferred.addBoth(self._cancelTimeout, timeoutCall)
 375         return p
 376
 377     def _cancelTimeout(self, result, timeoutCall):
 378         if timeoutCall.active():
 379             timeoutCall.cancel()
 380         return result
 381
 382     def gotHeaders(self, headers):
 383         self.response_headers = headers
 384         if headers.has_key('set-cookie'):
 385             for cookie in headers['set-cookie']:
 386                 cookparts = cookie.split(';')
 387                 cook = cookparts[0]
 388                 cook.lstrip()
 389                 k, v = cook.split('=', 1)
 390                 self.cookies[k.lstrip()] = v.lstrip()
 391
 392     def gotStatus(self, version, status, message):
 393         self.version, self.status, self.message = version, status, message
 394
 395     def page(self, page):
 396         if self.waiting:
 397             self.waiting = 0
 398             self.deferred.callback(page)
 399
 400     def noPage(self, reason):
 401         if self.waiting:
 402             self.waiting = 0
 403             self.deferred.errback(reason)
 404
 405     def clientConnectionFailed(self, _, reason):
 406         """
 407         When a connection attempt fails, the request cannot be issued.  If no
 408         result has yet been provided to the result Deferred, provide the
 409         connection failure reason as an error result.
 410         """
 411         if self.waiting:
 412             self.waiting = 0
 413             # If the connection attempt failed, there is nothing more to
 414             # disconnect, so just fire that Deferred now.
 415             self._disconnectedDeferred.callback(None)
 416             self.deferred.errback(reason)
 417
 418
 419
 420 class HTTPDownloader(HTTPClientFactory):
 421     """Download to a file."""
 422
 423     protocol = HTTPPageDownloader
 424     value = None
 425
 426     def __init__(self, url, fileOrName,
 427                  method='GET', postdata=None, headers=None,
 428                  agent="Twisted client", supportPartial=0,
 429                  timeout=0, cookies=None, followRedirect=1,
 430                  redirectLimit=20, afterFoundGet=False):
 431         self.requestedPartial = 0
 432         if isinstance(fileOrName, types.StringTypes):
 433             self.fileName = fileOrName
 434             self.file = None
 435             if supportPartial and os.path.exists(self.fileName):
 436                 fileLength = os.path.getsize(self.fileName)
 437                 if fileLength:
 438                     self.requestedPartial = fileLength
 439                     if headers == None:
 440                         headers = {}
 441                     headers["range"] = "bytes=%d-" % fileLength
 442         else:
 443             self.file = fileOrName
 444         HTTPClientFactory.__init__(
 445             self, url, method=method, postdata=postdata, headers=headers,
 446             agent=agent, timeout=timeout, cookies=cookies,
 447             followRedirect=followRedirect, redirectLimit=redirectLimit,
 448             afterFoundGet=afterFoundGet)
 449
 450
 451     def gotHeaders(self, headers):
 452         HTTPClientFactory.gotHeaders(self, headers)
 453         if self.requestedPartial:
 454             contentRange = headers.get("content-range", None)
 455             if not contentRange:
 456                 # server doesn't support partial requests, oh well
 457                 self.requestedPartial = 0
 458                 return
 459             start, end, realLength = http.parseContentRange(contentRange[0])
 460             if start != self.requestedPartial:
 461                 # server is acting wierdly
 462                 self.requestedPartial = 0
 463
 464
 465     def openFile(self, partialContent):
 466         if partialContent:
 467             file = open(self.fileName, 'rb+')
 468             file.seek(0, 2)
 469         else:
 470             file = open(self.fileName, 'wb')
 471         return file
 472
 473     def pageStart(self, partialContent):
 474         """Called on page download start.
 475
 476         @param partialContent: tells us if the download is partial download we requested.
 477         """
 478         if partialContent and not self.requestedPartial:
 479             raise ValueError, "we shouldn't get partial content response if we didn't want it!"
 480         if self.waiting:
 481             try:
 482                 if not self.file:
 483                     self.file = self.openFile(partialContent)
 484             except IOError:
 485                 #raise
 486                 self.deferred.errback(failure.Failure())
 487
 488     def pagePart(self, data):
 489         if not self.file:
 490             return
 491         try:
 492             self.file.write(data)
 493         except IOError:
 494             #raise
 495             self.file = None
 496             self.deferred.errback(failure.Failure())
 497
 498
 499     def noPage(self, reason):
 500         """
 501         Close the storage file and errback the waiting L{Deferred} with the
 502         given reason.
 503         """
 504         if self.waiting:
 505             self.waiting = 0
 506             if self.file:
 507                 try:
 508                     self.file.close()
 509                 except:
 510                     log.err(None, "Error closing HTTPDownloader file")
 511             self.deferred.errback(reason)
 512
 513
 514     def pageEnd(self):
 515         self.waiting = 0
 516         if not self.file:
 517             return
 518         try:
 519             self.file.close()
 520         except IOError:
 521             self.deferred.errback(failure.Failure())
 522             return
 523         self.deferred.callback(self.value)
 524
 525
 526
 527 class _URL(tuple):
 528     """
 529     A parsed URL.
 530
 531     At some point this should be replaced with a better URL implementation.
 532     """
 533     def __new__(self, scheme, host, port, path):
 534         return tuple.__new__(_URL, (scheme, host, port, path))
 535
 536
 537     def __init__(self, scheme, host, port, path):
 538         self.scheme = scheme
 539         self.host = host
 540         self.port = port
 541         self.path = path
 542
 543
 544 def _parse(url, defaultPort=None):
 545     """
 546     Split the given URL into the scheme, host, port, and path.
 547
 548     @type url: C{str}
 549     @param url: An URL to parse.
 550
 551     @type defaultPort: C{int} or C{None}
 552     @param defaultPort: An alternate value to use as the port if the URL does
 553     not include one.
 554
 555     @return: A four-tuple of the scheme, host, port, and path of the URL.  All
 556     of these are C{str} instances except for port, which is an C{int}.
 557     """
 558     url = url.strip()
 559     parsed = http.urlparse(url)
 560     scheme = parsed[0]
 561     path = urlunparse(('', '') + parsed[2:])
 562
 563     if defaultPort is None:
 564         if scheme == 'https':
 565             defaultPort = 443
 566         else:
 567             defaultPort = 80
 568
 569     host, port = parsed[1], defaultPort
 570     if ':' in host:
 571         host, port = host.split(':')
 572         try:
 573             port = int(port)
 574         except ValueError:
 575             port = defaultPort
 576
 577     if path == '':
 578         path = '/'
 579
 580     return _URL(scheme, host, port, path)
 581
 582
 583 def _makeGetterFactory(url, factoryFactory, contextFactory=None,
 584                        *args, **kwargs):
 585     """
 586     Create and connect an HTTP page getting factory.
 587
 588     Any additional positional or keyword arguments are used when calling
 589     C{factoryFactory}.
 590
 591     @param factoryFactory: Factory factory that is called with C{url}, C{args}
 592         and C{kwargs} to produce the getter
 593
 594     @param contextFactory: Context factory to use when creating a secure
 595         connection, defaulting to C{None}
 596
 597     @return: The factory created by C{factoryFactory}
 598     """
 599     scheme, host, port, path = _parse(url)
 600     factory = factoryFactory(url, *args, **kwargs)
 601     if scheme == 'https':
 602         from twisted.internet import ssl
 603         if contextFactory is None:
 604             contextFactory = ssl.ClientContextFactory()
 605         reactor.connectSSL(host, port, factory, contextFactory)
 606     else:
 607         reactor.connectTCP(host, port, factory)
 608     return factory
 609
 610
 611 def getPage(url, contextFactory=None, *args, **kwargs):
 612     """
 613     Download a web page as a string.
 614
 615     Download a page. Return a deferred, which will callback with a
 616     page (as a string) or errback with a description of the error.
 617
 618     See L{HTTPClientFactory} to see what extra arguments can be passed.
 619     """
 620     return _makeGetterFactory(
 621         url,
 622         HTTPClientFactory,
 623         contextFactory=contextFactory,
 624         *args, **kwargs).deferred
 625
 626
 627 def downloadPage(url, file, contextFactory=None, *args, **kwargs):
 628     """
 629     Download a web page to a file.
 630
 631     @param file: path to file on filesystem, or file-like object.
 632
 633     See HTTPDownloader to see what extra args can be passed.
 634     """
 635     factoryFactory = lambda url, *a, **kw: HTTPDownloader(url, file, *a, **kw)
 636     return _makeGetterFactory(
 637         url,
 638         factoryFactory,
 639         contextFactory=contextFactory,
 640         *args, **kwargs).deferred
 641
 642
 643 # The code which follows is based on the new HTTP client implementation.  It
 644 # should be significantly better than anything above, though it is not yet
 645 # feature equivalent.
 646
 647 from twisted.web.error import SchemeNotSupported
 648 from twisted.web._newclient import Request, Response, HTTP11ClientProtocol
 649 from twisted.web._newclient import ResponseDone, ResponseFailed
 650 from twisted.web._newclient import RequestNotSent, RequestTransmissionFailed
 651 from twisted.web._newclient import ResponseNeverReceived
 652
 653 try:
 654     from twisted.internet.ssl import ClientContextFactory
 655 except ImportError:
 656     class WebClientContextFactory(object):
 657         """
 658         A web context factory which doesn't work because the necessary SSL
 659         support is missing.
 660         """
 661         def getContext(self, hostname, port):
 662             raise NotImplementedError("SSL support unavailable")
 663 else:
 664     class WebClientContextFactory(ClientContextFactory):
 665         """
 666         A web context factory which ignores the hostname and port and does no
 667         certificate verification.
 668         """
 669         def getContext(self, hostname, port):
 670             return ClientContextFactory.getContext(self)
 671
 672
 673
 674 class _WebToNormalContextFactory(object):
 675     """
 676     Adapt a web context factory to a normal context factory.
 677
 678     @ivar _webContext: A web context factory which accepts a hostname and port
 679         number to its C{getContext} method.
 680
 681     @ivar _hostname: The hostname which will be passed to
 682         C{_webContext.getContext}.
 683
 684     @ivar _port: The port number which will be passed to
 685         C{_webContext.getContext}.
 686     """
 687     def __init__(self, webContext, hostname, port):
 688         self._webContext = webContext
 689         self._hostname = hostname
 690         self._port = port
 691
 692
 693     def getContext(self):
 694         """
 695         Called the wrapped web context factory's C{getContext} method with a
 696         hostname and port number and return the resulting context object.
 697         """
 698         return self._webContext.getContext(self._hostname, self._port)
 699
 700
 701
 702 class FileBodyProducer(object):
 703     """
 704     L{FileBodyProducer} produces bytes from an input file object incrementally
 705     and writes them to a consumer.
 706
 707     Since file-like objects cannot be read from in an event-driven manner,
 708     L{FileBodyProducer} uses a L{Cooperator} instance to schedule reads from
 709     the file.  This process is also paused and resumed based on notifications
 710     from the L{IConsumer} provider being written to.
 711
 712     The file is closed after it has been read, or if the producer is stopped
 713     early.
 714
 715     @ivar _inputFile: Any file-like object, bytes read from which will be
 716         written to a consumer.
 717
 718     @ivar _cooperate: A method like L{Cooperator.cooperate} which is used to
 719         schedule all reads.
 720
 721     @ivar _readSize: The number of bytes to read from C{_inputFile} at a time.
 722     """
 723     implements(IBodyProducer)
 724
 725     # Python 2.4 doesn't have these symbolic constants
 726     _SEEK_SET = getattr(os, 'SEEK_SET', 0)
 727     _SEEK_END = getattr(os, 'SEEK_END', 2)
 728
 729     def __init__(self, inputFile, cooperator=task, readSize=2 ** 16):
 730         self._inputFile = inputFile
 731         self._cooperate = cooperator.cooperate
 732         self._readSize = readSize
 733         self.length = self._determineLength(inputFile)
 734
 735
 736     def _determineLength(self, fObj):
 737         """
 738         Determine how many bytes can be read out of C{fObj} (assuming it is not
 739         modified from this point on).  If the determination cannot be made,
 740         return C{UNKNOWN_LENGTH}.
 741         """
 742         try:
 743             seek = fObj.seek
 744             tell = fObj.tell
 745         except AttributeError:
 746             return UNKNOWN_LENGTH
 747         originalPosition = tell()
 748         seek(0, self._SEEK_END)
 749         end = tell()
 750         seek(originalPosition, self._SEEK_SET)
 751         return end - originalPosition
 752
 753
 754     def stopProducing(self):
 755         """
 756         Permanently stop writing bytes from the file to the consumer by
 757         stopping the underlying L{CooperativeTask}.
 758         """
 759         self._inputFile.close()
 760         self._task.stop()
 761
 762
 763     def startProducing(self, consumer):
 764         """
 765         Start a cooperative task which will read bytes from the input file and
 766         write them to C{consumer}.  Return a L{Deferred} which fires after all
 767         bytes have been written.
 768
 769         @param consumer: Any L{IConsumer} provider
 770         """
 771         self._task = self._cooperate(self._writeloop(consumer))
 772         d = self._task.whenDone()
 773         def maybeStopped(reason):
 774             # IBodyProducer.startProducing's Deferred isn't support to fire if
 775             # stopProducing is called.
 776             reason.trap(task.TaskStopped)
 777             return defer.Deferred()
 778         d.addCallbacks(lambda ignored: None, maybeStopped)
 779         return d
 780
 781
 782     def _writeloop(self, consumer):
 783         """
 784         Return an iterator which reads one chunk of bytes from the input file
 785         and writes them to the consumer for each time it is iterated.
 786         """
 787         while True:
 788             bytes = self._inputFile.read(self._readSize)
 789             if not bytes:
 790                 self._inputFile.close()
 791                 break
 792             consumer.write(bytes)
 793             yield None
 794
 795
 796     def pauseProducing(self):
 797         """
 798         Temporarily suspend copying bytes from the input file to the consumer
 799         by pausing the L{CooperativeTask} which drives that activity.
 800         """
 801         self._task.pause()
 802
 803
 804     def resumeProducing(self):
 805         """
 806         Undo the effects of a previous C{pauseProducing} and resume copying
 807         bytes to the consumer by resuming the L{CooperativeTask} which drives
 808         the write activity.
 809         """
 810         self._task.resume()
 811
 812
 813
 814 class _HTTP11ClientFactory(protocol.Factory):
 815     """
 816     A factory for L{HTTP11ClientProtocol}, used by L{HTTPConnectionPool}.
 817
 818     @ivar _quiescentCallback: The quiescent callback to be passed to protocol
 819         instances, used to return them to the connection pool.
 820
 821     @since: 11.1
 822     """
 823     def __init__(self, quiescentCallback):
 824         self._quiescentCallback = quiescentCallback
 825
 826
 827     def buildProtocol(self, addr):
 828         return HTTP11ClientProtocol(self._quiescentCallback)
 829
 830
 831
 832 class _RetryingHTTP11ClientProtocol(object):
 833     """
 834     A wrapper for L{HTTP11ClientProtocol} that automatically retries requests.
 835
 836     @ivar _clientProtocol: The underlying L{HTTP11ClientProtocol}.
 837
 838     @ivar _newConnection: A callable that creates a new connection for a
 839         retry.
 840     """
 841
 842     def __init__(self, clientProtocol, newConnection):
 843         self._clientProtocol = clientProtocol
 844         self._newConnection = newConnection
 845
 846
 847     def _shouldRetry(self, method, exception, bodyProducer):
 848         """
 849         Indicate whether request should be retried.
 850
 851         Only returns C{True} if method is idempotent, no response was
 852         received, and no body was sent. The latter requirement may be relaxed
 853         in the future, and PUT added to approved method list.
 854         """
 855         if method not in ("GET", "HEAD", "OPTIONS", "DELETE", "TRACE"):
 856             return False
 857         if not isinstance(exception, (RequestNotSent, RequestTransmissionFailed,
 858                                       ResponseNeverReceived)):
 859             return False
 860         if bodyProducer is not None:
 861             return False
 862         return True
 863
 864
 865     def request(self, request):
 866         """
 867         Do a request, and retry once (with a new connection) it it fails in
 868         a retryable manner.
 869
 870         @param request: A L{Request} instance that will be requested using the
 871             wrapped protocol.
 872         """
 873         d = self._clientProtocol.request(request)
 874
 875         def failed(reason):
 876             if self._shouldRetry(request.method, reason.value,
 877                                  request.bodyProducer):
 878                 return self._newConnection().addCallback(
 879                     lambda connection: connection.request(request))
 880             else:
 881                 return reason
 882         d.addErrback(failed)
 883         return d
 884
 885
 886
 887 class HTTPConnectionPool(object):
 888     """
 889     A pool of persistent HTTP connections.
 890
 891     Features:
 892      - Cached connections will eventually time out.
 893      - Limits on maximum number of persistent connections.
 894
 895     Connections are stored using keys, which should be chosen such that any
 896     connections stored under a given key can be used interchangeably.
 897
 898     Failed requests done using previously cached connections will be retried
 899     once if they use an idempotent method (e.g. GET), in case the HTTP server
 900     timed them out.
 901
 902     @ivar persistent: Boolean indicating whether connections should be
 903         persistent. Connections are persistent by default.
 904
 905     @ivar maxPersistentPerHost: The maximum number of cached persistent
 906         connections for a C{host:port} destination.
 907     @type maxPersistentPerHost: C{int}
 908
 909     @ivar cachedConnectionTimeout: Number of seconds a cached persistent
 910         connection will stay open before disconnecting.
 911
 912     @ivar retryAutomatically: C{boolean} indicating whether idempotent
 913         requests should be retried once if no response was received.
 914
 915     @ivar _factory: The factory used to connect to the proxy.
 916
 917     @ivar _connections: Map (scheme, host, port) to lists of
 918         L{HTTP11ClientProtocol} instances.
 919
 920     @ivar _timeouts: Map L{HTTP11ClientProtocol} instances to a
 921         C{IDelayedCall} instance of their timeout.
 922
 923     @since: 12.1
 924     """
 925
 926     _factory = _HTTP11ClientFactory
 927     maxPersistentPerHost = 2
 928     cachedConnectionTimeout = 240
 929     retryAutomatically = True
 930
 931     def __init__(self, reactor, persistent=True):
 932         self._reactor = reactor
 933         self.persistent = persistent
 934         self._connections = {}
 935         self._timeouts = {}
 936
 937
 938     def getConnection(self, key, endpoint):
 939         """
 940         Retrieve a connection, either new or cached, to be used for a HTTP
 941         request.
 942
 943         If a cached connection is returned, it will not be used for other
 944         requests until it is put back (which will happen automatically), since
 945         we do not support pipelined requests. If no cached connection is
 946         available, the passed in endpoint is used to create the connection.
 947
 948         If the connection doesn't disconnect at the end of its request, it
 949         will be returned to this pool automatically. As such, only a single
 950         request should be sent using the returned connection.
 951
 952         @param key: A unique key identifying connections that can be used
 953             interchangeably.
 954
 955         @param endpoint: An endpoint that can be used to open a new connection
 956             if no cached connection is available.
 957
 958         @return: A C{Deferred} that will fire with a L{HTTP11ClientProtocol}
 959            (or a wrapper) that can be used to send a single HTTP request.
 960         """
 961         # Try to get cached version:
 962         connections = self._connections.get(key)
 963         while connections:
 964             connection = connections.pop(0)
 965             # Cancel timeout:
 966             self._timeouts[connection].cancel()
 967             del self._timeouts[connection]
 968             if connection.state == "QUIESCENT":
 969                 if self.retryAutomatically:
 970                     newConnection = lambda: self._newConnection(key, endpoint)
 971                     connection = _RetryingHTTP11ClientProtocol(
 972                         connection, newConnection)
 973                 return defer.succeed(connection)
 974
 975         return self._newConnection(key, endpoint)
 976
 977
 978     def _newConnection(self, key, endpoint):
 979         """
 980         Create a new connection.
 981
 982         This implements the new connection code path for L{getConnection}.
 983         """
 984         def quiescentCallback(protocol):
 985             self._putConnection(key, protocol)
 986         factory = self._factory(quiescentCallback)
 987         return endpoint.connect(factory)
 988
 989
 990     def _removeConnection(self, key, connection):
 991         """
 992         Remove a connection from the cache and disconnect it.
 993         """
 994         connection.transport.loseConnection()
 995         self._connections[key].remove(connection)
 996         del self._timeouts[connection]
 997
 998
 999     def _putConnection(self, key, connection):
1000         """
1001         Return a persistent connection to the pool. This will be called by
1002         L{HTTP11ClientProtocol} when the connection becomes quiescent.
1003         """
1004         if connection.state != "QUIESCENT":
1005             # Log with traceback for debugging purposes:
1006             try:
1007                 raise RuntimeError(
1008                     "BUG: Non-quiescent protocol added to connection pool.")
1009             except:
1010                 log.err()
1011             return
1012         connections = self._connections.setdefault(key, [])
1013         if len(connections) == self.maxPersistentPerHost:
1014             dropped = connections.pop(0)
1015             dropped.transport.loseConnection()
1016             self._timeouts[dropped].cancel()
1017             del self._timeouts[dropped]
1018         connections.append(connection)
1019         cid = self._reactor.callLater(self.cachedConnectionTimeout,
1020                                       self._removeConnection,
1021                                       key, connection)
1022         self._timeouts[connection] = cid
1023
1024
1025     def closeCachedConnections(self):
1026         """
1027         Close all persistent connections and remove them from the pool.
1028
1029         @return: L{defer.Deferred} that fires when all connections have been
1030             closed.
1031         """
1032         results = []
1033         for protocols in self._connections.itervalues():
1034             for p in protocols:
1035                 results.append(p.abort())
1036         self._connections = {}
1037         for dc in self._timeouts.values():
1038             dc.cancel()
1039         self._timeouts = {}
1040         return defer.gatherResults(results).addCallback(lambda ign: None)
1041
1042
1043
1044 class _AgentBase(object):
1045     """
1046     Base class offering common facilities for L{Agent}-type classes.
1047
1048     @ivar _reactor: The C{IReactorTime} implementation which will be used by
1049         the pool, and perhaps by subclasses as well.
1050
1051     @ivar _pool: The L{HTTPConnectionPool} used to manage HTTP connections.
1052     """
1053
1054     def __init__(self, reactor, pool):
1055         if pool is None:
1056             pool = HTTPConnectionPool(reactor, False)
1057         self._reactor = reactor
1058         self._pool = pool
1059
1060
1061     def _computeHostValue(self, scheme, host, port):
1062         """
1063         Compute the string to use for the value of the I{Host} header, based on
1064         the given scheme, host name, and port number.
1065         """
1066         if (scheme, port) in (('http', 80), ('https', 443)):
1067             return host
1068         return '%s:%d' % (host, port)
1069
1070
1071     def _requestWithEndpoint(self, key, endpoint, method, parsedURI,
1072                              headers, bodyProducer, requestPath):
1073         """
1074         Issue a new request, given the endpoint and the path sent as part of
1075         the request.
1076         """
1077         # Create minimal headers, if necessary:
1078         if headers is None:
1079             headers = Headers()
1080         if not headers.hasHeader('host'):
1081             headers = headers.copy()
1082             headers.addRawHeader(
1083                 'host', self._computeHostValue(parsedURI.scheme, parsedURI.host,
1084                                                parsedURI.port))
1085
1086         d = self._pool.getConnection(key, endpoint)
1087         def cbConnected(proto):
1088             return proto.request(
1089                 Request(method, requestPath, headers, bodyProducer,
1090                         persistent=self._pool.persistent))
1091         d.addCallback(cbConnected)
1092         return d
1093
1094
1095
1096 class Agent(_AgentBase):
1097     """
1098     L{Agent} is a very basic HTTP client.  It supports I{HTTP} and I{HTTPS}
1099     scheme URIs (but performs no certificate checking by default).
1100
1101     @param pool: A L{HTTPConnectionPool} instance, or C{None}, in which case a
1102         non-persistent L{HTTPConnectionPool} instance will be created.
1103
1104     @ivar _contextFactory: A web context factory which will be used to create
1105         SSL context objects for any SSL connections the agent needs to make.
1106
1107     @ivar _connectTimeout: If not C{None}, the timeout passed to C{connectTCP}
1108         or C{connectSSL} for specifying the connection timeout.
1109
1110     @ivar _bindAddress: If not C{None}, the address passed to C{connectTCP} or
1111         C{connectSSL} for specifying the local address to bind to.
1112
1113     @since: 9.0
1114     """
1115
1116     def __init__(self, reactor, contextFactory=WebClientContextFactory(),
1117                  connectTimeout=None, bindAddress=None,
1118                  pool=None):
1119         _AgentBase.__init__(self, reactor, pool)
1120         self._contextFactory = contextFactory
1121         self._connectTimeout = connectTimeout
1122         self._bindAddress = bindAddress
1123
1124
1125     def _wrapContextFactory(self, host, port):
1126         """
1127         Create and return a normal context factory wrapped around
1128         C{self._contextFactory} in such a way that C{self._contextFactory} will
1129         have the host and port information passed to it.
1130
1131         @param host: A C{str} giving the hostname which will be connected to in
1132             order to issue a request.
1133
1134         @param port: An C{int} giving the port number the connection will be
1135             on.
1136
1137         @return: A context factory suitable to be passed to
1138             C{reactor.connectSSL}.
1139         """
1140         return _WebToNormalContextFactory(self._contextFactory, host, port)
1141
1142
1143     def _getEndpoint(self, scheme, host, port):
1144         """
1145         Get an endpoint for the given host and port, using a transport
1146         selected based on scheme.
1147
1148         @param scheme: A string like C{'http'} or C{'https'} (the only two
1149             supported values) to use to determine how to establish the
1150             connection.
1151
1152         @param host: A C{str} giving the hostname which will be connected to in
1153             order to issue a request.
1154
1155         @param port: An C{int} giving the port number the connection will be
1156             on.
1157
1158         @return: An endpoint which can be used to connect to given address.
1159         """
1160         kwargs = {}
1161         if self._connectTimeout is not None:
1162             kwargs['timeout'] = self._connectTimeout
1163         kwargs['bindAddress'] = self._bindAddress
1164         if scheme == 'http':
1165             return TCP4ClientEndpoint(self._reactor, host, port, **kwargs)
1166         elif scheme == 'https':
1167             return SSL4ClientEndpoint(self._reactor, host, port,
1168                                       self._wrapContextFactory(host, port),
1169                                       **kwargs)
1170         else:
1171             raise SchemeNotSupported("Unsupported scheme: %r" % (scheme,))
1172
1173
1174     def request(self, method, uri, headers=None, bodyProducer=None):
1175         """
1176         Issue a new request.
1177
1178         @param method: The request method to send.
1179         @type method: C{str}
1180
1181         @param uri: The request URI send.
1182         @type uri: C{str}
1183
1184         @param headers: The request headers to send.  If no I{Host} header is
1185             included, one will be added based on the request URI.
1186         @type headers: L{Headers}
1187
1188         @param bodyProducer: An object which will produce the request body or,
1189             if the request body is to be empty, L{None}.
1190         @type bodyProducer: L{IBodyProducer} provider
1191
1192         @return: A L{Deferred} which fires with the result of the request (a
1193             L{twisted.web.iweb.IResponse} provider), or fails if there is a
1194             problem setting up a connection over which to issue the request.
1195             It may also fail with L{SchemeNotSupported} if the scheme of the
1196             given URI is not supported.
1197         @rtype: L{Deferred}
1198         """
1199         parsedURI = _parse(uri)
1200         try:
1201             endpoint = self._getEndpoint(parsedURI.scheme, parsedURI.host,
1202                                          parsedURI.port)
1203         except SchemeNotSupported:
1204             return defer.fail(Failure())
1205         key = (parsedURI.scheme, parsedURI.host, parsedURI.port)
1206         return self._requestWithEndpoint(key, endpoint, method, parsedURI,
1207                                          headers, bodyProducer, parsedURI.path)
1208
1209
1210
1211 class ProxyAgent(_AgentBase):
1212     """
1213     An HTTP agent able to cross HTTP proxies.
1214
1215     @ivar _proxyEndpoint: The endpoint used to connect to the proxy.
1216
1217     @since: 11.1
1218     """
1219
1220     def __init__(self, endpoint, reactor=None, pool=None):
1221         if reactor is None:
1222             from twisted.internet import reactor
1223         _AgentBase.__init__(self, reactor, pool)
1224         self._proxyEndpoint = endpoint
1225
1226
1227     def request(self, method, uri, headers=None, bodyProducer=None):
1228         """
1229         Issue a new request via the configured proxy.
1230         """
1231         # Cache *all* connections under the same key, since we are only
1232         # connecting to a single destination, the proxy:
1233         key = ("http-proxy", self._proxyEndpoint)
1234
1235         # To support proxying HTTPS via CONNECT, we will use key
1236         # ("http-proxy-CONNECT", scheme, host, port), and an endpoint that
1237         # wraps _proxyEndpoint with an additional callback to do the CONNECT.
1238         return self._requestWithEndpoint(key, self._proxyEndpoint, method,
1239                                          _parse(uri), headers, bodyProducer,
1240                                          uri)
1241
1242
1243
1244 class _FakeUrllib2Request(object):
1245     """
1246     A fake C{urllib2.Request} object for C{cookielib} to work with.
1247
1248     @see: U{http://docs.python.org/library/urllib2.html#request-objects}
1249
1250     @type uri: C{str}
1251     @ivar uri: Request URI.
1252
1253     @type headers: L{twisted.web.http_headers.Headers}
1254     @ivar headers: Request headers.
1255
1256     @type type: C{str}
1257     @ivar type: The scheme of the URI.
1258
1259     @type host: C{str}
1260     @ivar host: The host[:port] of the URI.
1261
1262     @since: 11.1
1263     """
1264     def __init__(self, uri):
1265         self.uri = uri
1266         self.headers = Headers()
1267         self.type, rest = splittype(self.uri)
1268         self.host, rest = splithost(rest)
1269
1270
1271     def has_header(self, header):
1272         return self.headers.hasHeader(header)
1273
1274
1275     def add_unredirected_header(self, name, value):
1276         self.headers.addRawHeader(name, value)
1277
1278
1279     def get_full_url(self):
1280         return self.uri
1281
1282
1283     def get_header(self, name, default=None):
1284         headers = self.headers.getRawHeaders(name, default)
1285         if headers is not None:
1286             return headers[0]
1287         return None
1288
1289
1290     def get_host(self):
1291         return self.host
1292
1293
1294     def get_type(self):
1295         return self.type
1296
1297
1298     def is_unverifiable(self):
1299         # In theory this shouldn't be hardcoded.
1300         return False
1301
1302
1303
1304 class _FakeUrllib2Response(object):
1305     """
1306     A fake C{urllib2.Response} object for C{cookielib} to work with.
1307
1308     @type response: C{twisted.web.iweb.IResponse}
1309     @ivar response: Underlying Twisted Web response.
1310
1311     @since: 11.1
1312     """
1313     def __init__(self, response):
1314         self.response = response
1315
1316
1317     def info(self):
1318         class _Meta(object):
1319             def getheaders(zelf, name):
1320                 return self.response.headers.getRawHeaders(name, [])
1321         return _Meta()
1322
1323
1324
1325 class CookieAgent(object):
1326     """
1327     L{CookieAgent} extends the basic L{Agent} to add RFC-compliant
1328     handling of HTTP cookies.  Cookies are written to and extracted
1329     from a C{cookielib.CookieJar} instance.
1330
1331     The same cookie jar instance will be used for any requests through this
1332     agent, mutating it whenever a I{Set-Cookie} header appears in a response.
1333
1334     @type _agent: L{twisted.web.client.Agent}
1335     @ivar _agent: Underlying Twisted Web agent to issue requests through.
1336
1337     @type cookieJar: C{cookielib.CookieJar}
1338     @ivar cookieJar: Initialized cookie jar to read cookies from and store
1339         cookies to.
1340
1341     @since: 11.1
1342     """
1343     def __init__(self, agent, cookieJar):
1344         self._agent = agent
1345         self.cookieJar = cookieJar
1346
1347
1348     def request(self, method, uri, headers=None, bodyProducer=None):
1349         """
1350         Issue a new request to the wrapped L{Agent}.
1351
1352         Send a I{Cookie} header if a cookie for C{uri} is stored in
1353         L{CookieAgent.cookieJar}. Cookies are automatically extracted and
1354         stored from requests.
1355
1356         If a C{'cookie'} header appears in C{headers} it will override the
1357         automatic cookie header obtained from the cookie jar.
1358
1359         @see: L{Agent.request}
1360         """
1361         if headers is None:
1362             headers = Headers()
1363         lastRequest = _FakeUrllib2Request(uri)
1364         # Setting a cookie header explicitly will disable automatic request
1365         # cookies.
1366         if not headers.hasHeader('cookie'):
1367             self.cookieJar.add_cookie_header(lastRequest)
1368             cookieHeader = lastRequest.get_header('Cookie', None)
1369             if cookieHeader is not None:
1370                 headers = headers.copy()
1371                 headers.addRawHeader('cookie', cookieHeader)
1372
1373         d = self._agent.request(method, uri, headers, bodyProducer)
1374         d.addCallback(self._extractCookies, lastRequest)
1375         return d
1376
1377
1378     def _extractCookies(self, response, request):
1379         """
1380         Extract response cookies and store them in the cookie jar.
1381
1382         @type response: L{twisted.web.iweb.IResponse}
1383         @param response: Twisted Web response.
1384
1385         @param request: A urllib2 compatible request object.
1386         """
1387         resp = _FakeUrllib2Response(response)
1388         self.cookieJar.extract_cookies(resp, request)
1389         return response
1390
1391
1392
1393 class GzipDecoder(proxyForInterface(IResponse)):
1394     """
1395     A wrapper for a L{Response} instance which handles gzip'ed body.
1396
1397     @ivar original: The original L{Response} object.
1398
1399     @since: 11.1
1400     """
1401
1402     def __init__(self, response):
1403         self.original = response
1404         self.length = UNKNOWN_LENGTH
1405
1406
1407     def deliverBody(self, protocol):
1408         """
1409         Override C{deliverBody} to wrap the given C{protocol} with
1410         L{_GzipProtocol}.
1411         """
1412         self.original.deliverBody(_GzipProtocol(protocol, self.original))
1413
1414
1415
1416 class _GzipProtocol(proxyForInterface(IProtocol)):
1417     """
1418     A L{Protocol} implementation which wraps another one, transparently
1419     decompressing received data.
1420
1421     @ivar _zlibDecompress: A zlib decompress object used to decompress the data
1422         stream.
1423
1424     @ivar _response: A reference to the original response, in case of errors.
1425
1426     @since: 11.1
1427     """
1428
1429     def __init__(self, protocol, response):
1430         self.original = protocol
1431         self._response = response
1432         self._zlibDecompress = zlib.decompressobj(16 + zlib.MAX_WBITS)
1433
1434
1435     def dataReceived(self, data):
1436         """
1437         Decompress C{data} with the zlib decompressor, forwarding the raw data
1438         to the original protocol.
1439         """
1440         try:
1441             rawData = self._zlibDecompress.decompress(data)
1442         except zlib.error:
1443             raise ResponseFailed([failure.Failure()], self._response)
1444         if rawData:
1445             self.original.dataReceived(rawData)
1446
1447
1448     def connectionLost(self, reason):
1449         """
1450         Forward the connection lost event, flushing remaining data from the
1451         decompressor if any.
1452         """
1453         try:
1454             rawData = self._zlibDecompress.flush()
1455         except zlib.error:
1456             raise ResponseFailed([reason, failure.Failure()], self._response)
1457         if rawData:
1458             self.original.dataReceived(rawData)
1459         self.original.connectionLost(reason)
1460
1461
1462
1463 class ContentDecoderAgent(object):
1464     """
1465     An L{Agent} wrapper to handle encoded content.
1466
1467     It takes care of declaring the support for content in the
1468     I{Accept-Encoding} header, and automatically decompresses the received data
1469     if it's effectively using compression.
1470
1471     @param decoders: A list or tuple of (name, decoder) objects. The name
1472         declares which decoding the decoder supports, and the decoder must
1473         return a response object when called/instantiated. For example,
1474         C{(('gzip', GzipDecoder))}. The order determines how the decoders are
1475         going to be advertized to the server.
1476
1477     @since: 11.1
1478     """
1479
1480     def __init__(self, agent, decoders):
1481         self._agent = agent
1482         self._decoders = dict(decoders)
1483         self._supported = ','.join([decoder[0] for decoder in decoders])
1484
1485
1486     def request(self, method, uri, headers=None, bodyProducer=None):
1487         """
1488         Send a client request which declares supporting compressed content.
1489
1490         @see: L{Agent.request}.
1491         """
1492         if headers is None:
1493             headers = Headers()
1494         else:
1495             headers = headers.copy()
1496         headers.addRawHeader('accept-encoding', self._supported)
1497         deferred = self._agent.request(method, uri, headers, bodyProducer)
1498         return deferred.addCallback(self._handleResponse)
1499
1500
1501     def _handleResponse(self, response):
1502         """
1503         Check if the response is encoded, and wrap it to handle decompression.
1504         """
1505         contentEncodingHeaders = response.headers.getRawHeaders(
1506             'content-encoding', [])
1507         contentEncodingHeaders = ','.join(contentEncodingHeaders).split(',')
1508         while contentEncodingHeaders:
1509             name = contentEncodingHeaders.pop().strip()
1510             decoder = self._decoders.get(name)
1511             if decoder is not None:
1512                 response = decoder(response)
1513             else:
1514                 # Add it back
1515                 contentEncodingHeaders.append(name)
1516                 break
1517         if contentEncodingHeaders:
1518             response.headers.setRawHeaders(
1519                 'content-encoding', [','.join(contentEncodingHeaders)])
1520         else:
1521             response.headers.removeHeader('content-encoding')
1522         return response
1523
1524
1525
1526 class RedirectAgent(object):
1527     """
1528     An L{Agent} wrapper which handles HTTP redirects.
1529
1530     The implementation is rather strict: 301 and 302 behaves like 307, not
1531     redirecting automatically on methods different from C{GET} and C{HEAD}.
1532
1533     @param redirectLimit: The maximum number of times the agent is allowed to
1534         follow redirects before failing with a L{error.InfiniteRedirection}.
1535
1536     @since: 11.1
1537     """
1538
1539     def __init__(self, agent, redirectLimit=20):
1540         self._agent = agent
1541         self._redirectLimit = redirectLimit
1542
1543
1544     def request(self, method, uri, headers=None, bodyProducer=None):
1545         """
1546         Send a client request following HTTP redirects.
1547
1548         @see: L{Agent.request}.
1549         """
1550         deferred = self._agent.request(method, uri, headers, bodyProducer)
1551         return deferred.addCallback(
1552             self._handleResponse, method, uri, headers, 0)
1553
1554
1555     def _handleRedirect(self, response, method, uri, headers, redirectCount):
1556         """
1557         Handle a redirect response, checking the number of redirects already
1558         followed, and extracting the location header fields.
1559         """
1560         if redirectCount >= self._redirectLimit:
1561             err = error.InfiniteRedirection(
1562                 response.code,
1563                 'Infinite redirection detected',
1564                 location=uri)
1565             raise ResponseFailed([failure.Failure(err)], response)
1566         locationHeaders = response.headers.getRawHeaders('location', [])
1567         if not locationHeaders:
1568             err = error.RedirectWithNoLocation(
1569                 response.code, 'No location header field', uri)
1570             raise ResponseFailed([failure.Failure(err)], response)
1571         location = locationHeaders[0]
1572         deferred = self._agent.request(method, location, headers)
1573         return deferred.addCallback(
1574             self._handleResponse, method, uri, headers, redirectCount + 1)
1575
1576
1577     def _handleResponse(self, response, method, uri, headers, redirectCount):
1578         """
1579         Handle the response, making another request if it indicates a redirect.
1580         """
1581         if response.code in (http.MOVED_PERMANENTLY, http.FOUND,
1582                              http.TEMPORARY_REDIRECT):
1583             if method not in ('GET', 'HEAD'):
1584                 err = error.PageRedirect(response.code, location=uri)
1585                 raise ResponseFailed([failure.Failure(err)], response)
1586             return self._handleRedirect(response, method, uri, headers,
1587                                         redirectCount)
1588         elif response.code == http.SEE_OTHER:
1589             return self._handleRedirect(response, 'GET', uri, headers,
1590                                         redirectCount)
1591         return response
1592
1593
1594
1595 __all__ = [
1596     'PartialDownloadError', 'HTTPPageGetter', 'HTTPPageDownloader',
1597     'HTTPClientFactory', 'HTTPDownloader', 'getPage', 'downloadPage',
1598     'ResponseDone', 'Response', 'ResponseFailed', 'Agent', 'CookieAgent',
1599     'ProxyAgent', 'ContentDecoderAgent', 'GzipDecoder', 'RedirectAgent',
1600     'HTTPConnectionPool']