Lib/urllib.py

   1 """Open an arbitrary URL.
   2
   3 See the following document for more info on URLs:
   4 "Names and Addresses, URIs, URLs, URNs, URCs", at
   5 http://www.w3.org/pub/WWW/Addressing/Overview.html
   6
   7 See also the HTTP spec (from which the error codes are derived):
   8 "HTTP - Hypertext Transfer Protocol", at
   9 http://www.w3.org/pub/WWW/Protocols/
  10
  11 Related standards and specs:
  12 - RFC1808: the "relative URL" spec. (authoritative status)
  13 - RFC1738 - the "URL standard". (authoritative status)
  14 - RFC1630 - the "URI spec". (informational status)
  15
  16 The object returned by URLopener().open(file) will differ per
  17 protocol.  All you know is that is has methods read(), readline(),
  18 readlines(), fileno(), close() and info().  The read*(), fileno()
  19 and close() methods work like those of open files.
  20 The info() method returns a mimetools.Message object which can be
  21 used to query various info about the object, if available.
  22 (mimetools.Message objects are queried with the getheader() method.)
  23 """
  24
  25 import string
  26 import socket
  27 import os
  28 import time
  29 import sys
  30 import base64
  31
  32 from urlparse import urljoin as basejoin
  33
  34 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
  35            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
  36            "urlencode", "url2pathname", "pathname2url", "splittag",
  37            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
  38            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
  39            "splitnport", "splitquery", "splitattr", "splitvalue",
  40            "getproxies"]
  41
  42 __version__ = '1.17'    # XXX This version is not always updated :-(
  43
  44 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
  45
  46 # Helper for non-unix systems
  47 if os.name == 'nt':
  48     from nturl2path import url2pathname, pathname2url
  49 elif os.name == 'riscos':
  50     from rourl2path import url2pathname, pathname2url
  51 else:
  52     def url2pathname(pathname):
  53         """OS-specific conversion from a relative URL of the 'file' scheme
  54         to a file system path; not recommended for general use."""
  55         return unquote(pathname)
  56
  57     def pathname2url(pathname):
  58         """OS-specific conversion from a file system path to a relative URL
  59         of the 'file' scheme; not recommended for general use."""
  60         return quote(pathname)
  61
  62 # This really consists of two pieces:
  63 # (1) a class which handles opening of all sorts of URLs
  64 #     (plus assorted utilities etc.)
  65 # (2) a set of functions for parsing URLs
  66 # XXX Should these be separated out into different modules?
  67
  68
  69 # Shortcut for basic usage
  70 _urlopener = None
  71 def urlopen(url, data=None, proxies=None):
  72     """Create a file-like object for the specified URL to read from."""
  73     from warnings import warnpy3k
  74     warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
  75              "favor of urllib2.urlopen()", stacklevel=2)
  76
  77     global _urlopener
  78     if proxies is not None:
  79         opener = FancyURLopener(proxies=proxies)
  80     elif not _urlopener:
  81         opener = FancyURLopener()
  82         _urlopener = opener
  83     else:
  84         opener = _urlopener
  85     if data is None:
  86         return opener.open(url)
  87     else:
  88         return opener.open(url, data)
  89 def urlretrieve(url, filename=None, reporthook=None, data=None):
  90     global _urlopener
  91     if not _urlopener:
  92         _urlopener = FancyURLopener()
  93     return _urlopener.retrieve(url, filename, reporthook, data)
  94 def urlcleanup():
  95     if _urlopener:
  96         _urlopener.cleanup()
  97     _safe_quoters.clear()
  98     ftpcache.clear()
  99
 100 # check for SSL
 101 try:
 102     import ssl
 103 except:
 104     _have_ssl = False
 105 else:
 106     _have_ssl = True
 107
 108 # exception raised when downloaded size does not match content-length
 109 class ContentTooShortError(IOError):
 110     def __init__(self, message, content):
 111         IOError.__init__(self, message)
 112         self.content = content
 113
 114 ftpcache = {}
 115 class URLopener:
 116     """Class to open URLs.
 117     This is a class rather than just a subroutine because we may need
 118     more than one set of global protocol-specific options.
 119     Note -- this is a base class for those who don't want the
 120     automatic handling of errors type 302 (relocated) and 401
 121     (authorization needed)."""
 122
 123     __tempfiles = None
 124
 125     version = "Python-urllib/%s" % __version__
 126
 127     # Constructor
 128     def __init__(self, proxies=None, **x509):
 129         if proxies is None:
 130             proxies = getproxies()
 131         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 132         self.proxies = proxies
 133         self.key_file = x509.get('key_file')
 134         self.cert_file = x509.get('cert_file')
 135         self.addheaders = [('User-Agent', self.version)]
 136         self.__tempfiles = []
 137         self.__unlink = os.unlink # See cleanup()
 138         self.tempcache = None
 139         # Undocumented feature: if you assign {} to tempcache,
 140         # it is used to cache files retrieved with
 141         # self.retrieve().  This is not enabled by default
 142         # since it does not work for changing documents (and I
 143         # haven't got the logic to check expiration headers
 144         # yet).
 145         self.ftpcache = ftpcache
 146         # Undocumented feature: you can use a different
 147         # ftp cache by assigning to the .ftpcache member;
 148         # in case you want logically independent URL openers
 149         # XXX This is not threadsafe.  Bah.
 150
 151     def __del__(self):
 152         self.close()
 153
 154     def close(self):
 155         self.cleanup()
 156
 157     def cleanup(self):
 158         # This code sometimes runs when the rest of this module
 159         # has already been deleted, so it can't use any globals
 160         # or import anything.
 161         if self.__tempfiles:
 162             for file in self.__tempfiles:
 163                 try:
 164                     self.__unlink(file)
 165                 except OSError:
 166                     pass
 167             del self.__tempfiles[:]
 168         if self.tempcache:
 169             self.tempcache.clear()
 170
 171     def addheader(self, *args):
 172         """Add a header to be used by the HTTP interface only
 173         e.g. u.addheader('Accept', 'sound/basic')"""
 174         self.addheaders.append(args)
 175
 176     # External interface
 177     def open(self, fullurl, data=None):
 178         """Use URLopener().open(file) instead of open(file, 'r')."""
 179         fullurl = unwrap(toBytes(fullurl))
 180         # percent encode url, fixing lame server errors for e.g, like space
 181         # within url paths.
 182         fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
 183         if self.tempcache and fullurl in self.tempcache:
 184             filename, headers = self.tempcache[fullurl]
 185             fp = open(filename, 'rb')
 186             return addinfourl(fp, headers, fullurl)
 187         urltype, url = splittype(fullurl)
 188         if not urltype:
 189             urltype = 'file'
 190         if urltype in self.proxies:
 191             proxy = self.proxies[urltype]
 192             urltype, proxyhost = splittype(proxy)
 193             host, selector = splithost(proxyhost)
 194             url = (host, fullurl) # Signal special case to open_*()
 195         else:
 196             proxy = None
 197         name = 'open_' + urltype
 198         self.type = urltype
 199         name = name.replace('-', '_')
 200         if not hasattr(self, name):
 201             if proxy:
 202                 return self.open_unknown_proxy(proxy, fullurl, data)
 203             else:
 204                 return self.open_unknown(fullurl, data)
 205         try:
 206             if data is None:
 207                 return getattr(self, name)(url)
 208             else:
 209                 return getattr(self, name)(url, data)
 210         except socket.error, msg:
 211             raise IOError, ('socket error', msg), sys.exc_info()[2]
 212
 213     def open_unknown(self, fullurl, data=None):
 214         """Overridable interface to open unknown URL type."""
 215         type, url = splittype(fullurl)
 216         raise IOError, ('url error', 'unknown url type', type)
 217
 218     def open_unknown_proxy(self, proxy, fullurl, data=None):
 219         """Overridable interface to open unknown URL type."""
 220         type, url = splittype(fullurl)
 221         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
 222
 223     # External interface
 224     def retrieve(self, url, filename=None, reporthook=None, data=None):
 225         """retrieve(url) returns (filename, headers) for a local object
 226         or (tempfilename, headers) for a remote object."""
 227         url = unwrap(toBytes(url))
 228         if self.tempcache and url in self.tempcache:
 229             return self.tempcache[url]
 230         type, url1 = splittype(url)
 231         if filename is None and (not type or type == 'file'):
 232             try:
 233                 fp = self.open_local_file(url1)
 234                 hdrs = fp.info()
 235                 fp.close()
 236                 return url2pathname(splithost(url1)[1]), hdrs
 237             except IOError:
 238                 pass
 239         fp = self.open(url, data)
 240         try:
 241             headers = fp.info()
 242             if filename:
 243                 tfp = open(filename, 'wb')
 244             else:
 245                 import tempfile
 246                 garbage, path = splittype(url)
 247                 garbage, path = splithost(path or "")
 248                 path, garbage = splitquery(path or "")
 249                 path, garbage = splitattr(path or "")
 250                 suffix = os.path.splitext(path)[1]
 251                 (fd, filename) = tempfile.mkstemp(suffix)
 252                 self.__tempfiles.append(filename)
 253                 tfp = os.fdopen(fd, 'wb')
 254             try:
 255                 result = filename, headers
 256                 if self.tempcache is not None:
 257                     self.tempcache[url] = result
 258                 bs = 1024*8
 259                 size = -1
 260                 read = 0
 261                 blocknum = 0
 262                 if "content-length" in headers:
 263                     size = int(headers["Content-Length"])
 264                 if reporthook:
 265                     reporthook(blocknum, bs, size)
 266                 while 1:
 267                     block = fp.read(bs)
 268                     if block == "":
 269                         break
 270                     read += len(block)
 271                     tfp.write(block)
 272                     blocknum += 1
 273                     if reporthook:
 274                         reporthook(blocknum, bs, size)
 275             finally:
 276                 tfp.close()
 277         finally:
 278             fp.close()
 279
 280         # raise exception if actual size does not match content-length header
 281         if size >= 0 and read < size:
 282             raise ContentTooShortError("retrieval incomplete: got only %i out "
 283                                        "of %i bytes" % (read, size), result)
 284
 285         return result
 286
 287     # Each method named open_<type> knows how to open that type of URL
 288
 289     def open_http(self, url, data=None):
 290         """Use HTTP protocol."""
 291         import httplib
 292         user_passwd = None
 293         proxy_passwd= None
 294         if isinstance(url, str):
 295             host, selector = splithost(url)
 296             if host:
 297                 user_passwd, host = splituser(host)
 298                 host = unquote(host)
 299             realhost = host
 300         else:
 301             host, selector = url
 302             # check whether the proxy contains authorization information
 303             proxy_passwd, host = splituser(host)
 304             # now we proceed with the url we want to obtain
 305             urltype, rest = splittype(selector)
 306             url = rest
 307             user_passwd = None
 308             if urltype.lower() != 'http':
 309                 realhost = None
 310             else:
 311                 realhost, rest = splithost(rest)
 312                 if realhost:
 313                     user_passwd, realhost = splituser(realhost)
 314                 if user_passwd:
 315                     selector = "%s://%s%s" % (urltype, realhost, rest)
 316                 if proxy_bypass(realhost):
 317                     host = realhost
 318
 319             #print "proxy via http:", host, selector
 320         if not host: raise IOError, ('http error', 'no host given')
 321
 322         if proxy_passwd:
 323             proxy_passwd = unquote(proxy_passwd)
 324             proxy_auth = base64.b64encode(proxy_passwd).strip()
 325         else:
 326             proxy_auth = None
 327
 328         if user_passwd:
 329             user_passwd = unquote(user_passwd)
 330             auth = base64.b64encode(user_passwd).strip()
 331         else:
 332             auth = None
 333         h = httplib.HTTP(host)
 334         if data is not None:
 335             h.putrequest('POST', selector)
 336             h.putheader('Content-Type', 'application/x-www-form-urlencoded')
 337             h.putheader('Content-Length', '%d' % len(data))
 338         else:
 339             h.putrequest('GET', selector)
 340         if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
 341         if auth: h.putheader('Authorization', 'Basic %s' % auth)
 342         if realhost: h.putheader('Host', realhost)
 343         for args in self.addheaders: h.putheader(*args)
 344         h.endheaders(data)
 345         errcode, errmsg, headers = h.getreply()
 346         fp = h.getfile()
 347         if errcode == -1:
 348             if fp: fp.close()
 349             # something went wrong with the HTTP status line
 350             raise IOError, ('http protocol error', 0,
 351                             'got a bad status line', None)
 352         # According to RFC 2616, "2xx" code indicates that the client's
 353         # request was successfully received, understood, and accepted.
 354         if (200 <= errcode < 300):
 355             return addinfourl(fp, headers, "http:" + url, errcode)
 356         else:
 357             if data is None:
 358                 return self.http_error(url, fp, errcode, errmsg, headers)
 359             else:
 360                 return self.http_error(url, fp, errcode, errmsg, headers, data)
 361
 362     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
 363         """Handle http errors.
 364         Derived class can override this, or provide specific handlers
 365         named http_error_DDD where DDD is the 3-digit error code."""
 366         # First check if there's a specific handler for this error
 367         name = 'http_error_%d' % errcode
 368         if hasattr(self, name):
 369             method = getattr(self, name)
 370             if data is None:
 371                 result = method(url, fp, errcode, errmsg, headers)
 372             else:
 373                 result = method(url, fp, errcode, errmsg, headers, data)
 374             if result: return result
 375         return self.http_error_default(url, fp, errcode, errmsg, headers)
 376
 377     def http_error_default(self, url, fp, errcode, errmsg, headers):
 378         """Default error handler: close the connection and raise IOError."""
 379         fp.close()
 380         raise IOError, ('http error', errcode, errmsg, headers)
 381
 382     if _have_ssl:
 383         def open_https(self, url, data=None):
 384             """Use HTTPS protocol."""
 385
 386             import httplib
 387             user_passwd = None
 388             proxy_passwd = None
 389             if isinstance(url, str):
 390                 host, selector = splithost(url)
 391                 if host:
 392                     user_passwd, host = splituser(host)
 393                     host = unquote(host)
 394                 realhost = host
 395             else:
 396                 host, selector = url
 397                 # here, we determine, whether the proxy contains authorization information
 398                 proxy_passwd, host = splituser(host)
 399                 urltype, rest = splittype(selector)
 400                 url = rest
 401                 user_passwd = None
 402                 if urltype.lower() != 'https':
 403                     realhost = None
 404                 else:
 405                     realhost, rest = splithost(rest)
 406                     if realhost:
 407                         user_passwd, realhost = splituser(realhost)
 408                     if user_passwd:
 409                         selector = "%s://%s%s" % (urltype, realhost, rest)
 410                 #print "proxy via https:", host, selector
 411             if not host: raise IOError, ('https error', 'no host given')
 412             if proxy_passwd:
 413                 proxy_passwd = unquote(proxy_passwd)
 414                 proxy_auth = base64.b64encode(proxy_passwd).strip()
 415             else:
 416                 proxy_auth = None
 417             if user_passwd:
 418                 user_passwd = unquote(user_passwd)
 419                 auth = base64.b64encode(user_passwd).strip()
 420             else:
 421                 auth = None
 422             h = httplib.HTTPS(host, 0,
 423                               key_file=self.key_file,
 424                               cert_file=self.cert_file)
 425             if data is not None:
 426                 h.putrequest('POST', selector)
 427                 h.putheader('Content-Type',
 428                             'application/x-www-form-urlencoded')
 429                 h.putheader('Content-Length', '%d' % len(data))
 430             else:
 431                 h.putrequest('GET', selector)
 432             if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
 433             if auth: h.putheader('Authorization', 'Basic %s' % auth)
 434             if realhost: h.putheader('Host', realhost)
 435             for args in self.addheaders: h.putheader(*args)
 436             h.endheaders(data)
 437             errcode, errmsg, headers = h.getreply()
 438             fp = h.getfile()
 439             if errcode == -1:
 440                 if fp: fp.close()
 441                 # something went wrong with the HTTP status line
 442                 raise IOError, ('http protocol error', 0,
 443                                 'got a bad status line', None)
 444             # According to RFC 2616, "2xx" code indicates that the client's
 445             # request was successfully received, understood, and accepted.
 446             if (200 <= errcode < 300):
 447                 return addinfourl(fp, headers, "https:" + url, errcode)
 448             else:
 449                 if data is None:
 450                     return self.http_error(url, fp, errcode, errmsg, headers)
 451                 else:
 452                     return self.http_error(url, fp, errcode, errmsg, headers,
 453                                            data)
 454
 455     def open_file(self, url):
 456         """Use local file or FTP depending on form of URL."""
 457         if not isinstance(url, str):
 458             raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
 459         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
 460             return self.open_ftp(url)
 461         else:
 462             return self.open_local_file(url)
 463
 464     def open_local_file(self, url):
 465         """Use local file."""
 466         import mimetypes, mimetools, email.utils
 467         try:
 468             from cStringIO import StringIO
 469         except ImportError:
 470             from StringIO import StringIO
 471         host, file = splithost(url)
 472         localname = url2pathname(file)
 473         try:
 474             stats = os.stat(localname)
 475         except OSError, e:
 476             raise IOError(e.errno, e.strerror, e.filename)
 477         size = stats.st_size
 478         modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
 479         mtype = mimetypes.guess_type(url)[0]
 480         headers = mimetools.Message(StringIO(
 481             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
 482             (mtype or 'text/plain', size, modified)))
 483         if not host:
 484             urlfile = file
 485             if file[:1] == '/':
 486                 urlfile = 'file://' + file
 487             elif file[:2] == './':
 488                 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
 489             return addinfourl(open(localname, 'rb'),
 490                               headers, urlfile)
 491         host, port = splitport(host)
 492         if not port \
 493            and socket.gethostbyname(host) in (localhost(), thishost()):
 494             urlfile = file
 495             if file[:1] == '/':
 496                 urlfile = 'file://' + file
 497             return addinfourl(open(localname, 'rb'),
 498                               headers, urlfile)
 499         raise IOError, ('local file error', 'not on local host')
 500
 501     def open_ftp(self, url):
 502         """Use FTP protocol."""
 503         if not isinstance(url, str):
 504             raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
 505         import mimetypes, mimetools
 506         try:
 507             from cStringIO import StringIO
 508         except ImportError:
 509             from StringIO import StringIO
 510         host, path = splithost(url)
 511         if not host: raise IOError, ('ftp error', 'no host given')
 512         host, port = splitport(host)
 513         user, host = splituser(host)
 514         if user: user, passwd = splitpasswd(user)
 515         else: passwd = None
 516         host = unquote(host)
 517         user = user or ''
 518         passwd = passwd or ''
 519         host = socket.gethostbyname(host)
 520         if not port:
 521             import ftplib
 522             port = ftplib.FTP_PORT
 523         else:
 524             port = int(port)
 525         path, attrs = splitattr(path)
 526         path = unquote(path)
 527         dirs = path.split('/')
 528         dirs, file = dirs[:-1], dirs[-1]
 529         if dirs and not dirs[0]: dirs = dirs[1:]
 530         if dirs and not dirs[0]: dirs[0] = '/'
 531         key = user, host, port, '/'.join(dirs)
 532         # XXX thread unsafe!
 533         if len(self.ftpcache) > MAXFTPCACHE:
 534             # Prune the cache, rather arbitrarily
 535             for k in self.ftpcache.keys():
 536                 if k != key:
 537                     v = self.ftpcache[k]
 538                     del self.ftpcache[k]
 539                     v.close()
 540         try:
 541             if not key in self.ftpcache:
 542                 self.ftpcache[key] = \
 543                     ftpwrapper(user, passwd, host, port, dirs)
 544             if not file: type = 'D'
 545             else: type = 'I'
 546             for attr in attrs:
 547                 attr, value = splitvalue(attr)
 548                 if attr.lower() == 'type' and \
 549                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
 550                     type = value.upper()
 551             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
 552             mtype = mimetypes.guess_type("ftp:" + url)[0]
 553             headers = ""
 554             if mtype:
 555                 headers += "Content-Type: %s\n" % mtype
 556             if retrlen is not None and retrlen >= 0:
 557                 headers += "Content-Length: %d\n" % retrlen
 558             headers = mimetools.Message(StringIO(headers))
 559             return addinfourl(fp, headers, "ftp:" + url)
 560         except ftperrors(), msg:
 561             raise IOError, ('ftp error', msg), sys.exc_info()[2]
 562
 563     def open_data(self, url, data=None):
 564         """Use "data" URL."""
 565         if not isinstance(url, str):
 566             raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
 567         # ignore POSTed data
 568         #
 569         # syntax of data URLs:
 570         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 571         # mediatype := [ type "/" subtype ] *( ";" parameter )
 572         # data      := *urlchar
 573         # parameter := attribute "=" value
 574         import mimetools
 575         try:
 576             from cStringIO import StringIO
 577         except ImportError:
 578             from StringIO import StringIO
 579         try:
 580             [type, data] = url.split(',', 1)
 581         except ValueError:
 582             raise IOError, ('data error', 'bad data URL')
 583         if not type:
 584             type = 'text/plain;charset=US-ASCII'
 585         semi = type.rfind(';')
 586         if semi >= 0 and '=' not in type[semi:]:
 587             encoding = type[semi+1:]
 588             type = type[:semi]
 589         else:
 590             encoding = ''
 591         msg = []
 592         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
 593                                             time.gmtime(time.time())))
 594         msg.append('Content-type: %s' % type)
 595         if encoding == 'base64':
 596             data = base64.decodestring(data)
 597         else:
 598             data = unquote(data)
 599         msg.append('Content-Length: %d' % len(data))
 600         msg.append('')
 601         msg.append(data)
 602         msg = '\n'.join(msg)
 603         f = StringIO(msg)
 604         headers = mimetools.Message(f, 0)
 605         #f.fileno = None     # needed for addinfourl
 606         return addinfourl(f, headers, url)
 607
 608
 609 class FancyURLopener(URLopener):
 610     """Derived class with handlers for errors we can handle (perhaps)."""
 611
 612     def __init__(self, *args, **kwargs):
 613         URLopener.__init__(self, *args, **kwargs)
 614         self.auth_cache = {}
 615         self.tries = 0
 616         self.maxtries = 10
 617
 618     def http_error_default(self, url, fp, errcode, errmsg, headers):
 619         """Default error handling -- don't raise an exception."""
 620         return addinfourl(fp, headers, "http:" + url, errcode)
 621
 622     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
 623         """Error 302 -- relocated (temporarily)."""
 624         self.tries += 1
 625         if self.maxtries and self.tries >= self.maxtries:
 626             if hasattr(self, "http_error_500"):
 627                 meth = self.http_error_500
 628             else:
 629                 meth = self.http_error_default
 630             self.tries = 0
 631             return meth(url, fp, 500,
 632                         "Internal Server Error: Redirect Recursion", headers)
 633         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
 634                                         data)
 635         self.tries = 0
 636         return result
 637
 638     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
 639         if 'location' in headers:
 640             newurl = headers['location']
 641         elif 'uri' in headers:
 642             newurl = headers['uri']
 643         else:
 644             return
 645         fp.close()
 646         # In case the server sent a relative URL, join with original:
 647         newurl = basejoin(self.type + ":" + url, newurl)
 648
 649         # For security reasons we do not allow redirects to protocols
 650         # other than HTTP, HTTPS or FTP.
 651         newurl_lower = newurl.lower()
 652         if not (newurl_lower.startswith('http://') or
 653                 newurl_lower.startswith('https://') or
 654                 newurl_lower.startswith('ftp://')):
 655             raise IOError('redirect error', errcode,
 656                           errmsg + " - Redirection to url '%s' is not allowed" %
 657                           newurl,
 658                           headers)
 659
 660         return self.open(newurl)
 661
 662     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
 663         """Error 301 -- also relocated (permanently)."""
 664         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 665
 666     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
 667         """Error 303 -- also relocated (essentially identical to 302)."""
 668         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 669
 670     def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
 671         """Error 307 -- relocated, but turn POST into error."""
 672         if data is None:
 673             return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 674         else:
 675             return self.http_error_default(url, fp, errcode, errmsg, headers)
 676
 677     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
 678         """Error 401 -- authentication required.
 679         This function supports Basic authentication only."""
 680         if not 'www-authenticate' in headers:
 681             URLopener.http_error_default(self, url, fp,
 682                                          errcode, errmsg, headers)
 683         stuff = headers['www-authenticate']
 684         import re
 685         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 686         if not match:
 687             URLopener.http_error_default(self, url, fp,
 688                                          errcode, errmsg, headers)
 689         scheme, realm = match.groups()
 690         if scheme.lower() != 'basic':
 691             URLopener.http_error_default(self, url, fp,
 692                                          errcode, errmsg, headers)
 693         name = 'retry_' + self.type + '_basic_auth'
 694         if data is None:
 695             return getattr(self,name)(url, realm)
 696         else:
 697             return getattr(self,name)(url, realm, data)
 698
 699     def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
 700         """Error 407 -- proxy authentication required.
 701         This function supports Basic authentication only."""
 702         if not 'proxy-authenticate' in headers:
 703             URLopener.http_error_default(self, url, fp,
 704                                          errcode, errmsg, headers)
 705         stuff = headers['proxy-authenticate']
 706         import re
 707         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 708         if not match:
 709             URLopener.http_error_default(self, url, fp,
 710                                          errcode, errmsg, headers)
 711         scheme, realm = match.groups()
 712         if scheme.lower() != 'basic':
 713             URLopener.http_error_default(self, url, fp,
 714                                          errcode, errmsg, headers)
 715         name = 'retry_proxy_' + self.type + '_basic_auth'
 716         if data is None:
 717             return getattr(self,name)(url, realm)
 718         else:
 719             return getattr(self,name)(url, realm, data)
 720
 721     def retry_proxy_http_basic_auth(self, url, realm, data=None):
 722         host, selector = splithost(url)
 723         newurl = 'http://' + host + selector
 724         proxy = self.proxies['http']
 725         urltype, proxyhost = splittype(proxy)
 726         proxyhost, proxyselector = splithost(proxyhost)
 727         i = proxyhost.find('@') + 1
 728         proxyhost = proxyhost[i:]
 729         user, passwd = self.get_user_passwd(proxyhost, realm, i)
 730         if not (user or passwd): return None
 731         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
 732         self.proxies['http'] = 'http://' + proxyhost + proxyselector
 733         if data is None:
 734             return self.open(newurl)
 735         else:
 736             return self.open(newurl, data)
 737
 738     def retry_proxy_https_basic_auth(self, url, realm, data=None):
 739         host, selector = splithost(url)
 740         newurl = 'https://' + host + selector
 741         proxy = self.proxies['https']
 742         urltype, proxyhost = splittype(proxy)
 743         proxyhost, proxyselector = splithost(proxyhost)
 744         i = proxyhost.find('@') + 1
 745         proxyhost = proxyhost[i:]
 746         user, passwd = self.get_user_passwd(proxyhost, realm, i)
 747         if not (user or passwd): return None
 748         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
 749         self.proxies['https'] = 'https://' + proxyhost + proxyselector
 750         if data is None:
 751             return self.open(newurl)
 752         else:
 753             return self.open(newurl, data)
 754
 755     def retry_http_basic_auth(self, url, realm, data=None):
 756         host, selector = splithost(url)
 757         i = host.find('@') + 1
 758         host = host[i:]
 759         user, passwd = self.get_user_passwd(host, realm, i)
 760         if not (user or passwd): return None
 761         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 762         newurl = 'http://' + host + selector
 763         if data is None:
 764             return self.open(newurl)
 765         else:
 766             return self.open(newurl, data)
 767
 768     def retry_https_basic_auth(self, url, realm, data=None):
 769         host, selector = splithost(url)
 770         i = host.find('@') + 1
 771         host = host[i:]
 772         user, passwd = self.get_user_passwd(host, realm, i)
 773         if not (user or passwd): return None
 774         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 775         newurl = 'https://' + host + selector
 776         if data is None:
 777             return self.open(newurl)
 778         else:
 779             return self.open(newurl, data)
 780
 781     def get_user_passwd(self, host, realm, clear_cache=0):
 782         key = realm + '@' + host.lower()
 783         if key in self.auth_cache:
 784             if clear_cache:
 785                 del self.auth_cache[key]
 786             else:
 787                 return self.auth_cache[key]
 788         user, passwd = self.prompt_user_passwd(host, realm)
 789         if user or passwd: self.auth_cache[key] = (user, passwd)
 790         return user, passwd
 791
 792     def prompt_user_passwd(self, host, realm):
 793         """Override this in a GUI environment!"""
 794         import getpass
 795         try:
 796             user = raw_input("Enter username for %s at %s: " % (realm,
 797                                                                 host))
 798             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
 799                 (user, realm, host))
 800             return user, passwd
 801         except KeyboardInterrupt:
 802             print
 803             return None, None
 804
 805
 806 # Utility functions
 807
 808 _localhost = None
 809 def localhost():
 810     """Return the IP address of the magic hostname 'localhost'."""
 811     global _localhost
 812     if _localhost is None:
 813         _localhost = socket.gethostbyname('localhost')
 814     return _localhost
 815
 816 _thishost = None
 817 def thishost():
 818     """Return the IP address of the current host."""
 819     global _thishost
 820     if _thishost is None:
 821         _thishost = socket.gethostbyname(socket.gethostname())
 822     return _thishost
 823
 824 _ftperrors = None
 825 def ftperrors():
 826     """Return the set of errors raised by the FTP class."""
 827     global _ftperrors
 828     if _ftperrors is None:
 829         import ftplib
 830         _ftperrors = ftplib.all_errors
 831     return _ftperrors
 832
 833 _noheaders = None
 834 def noheaders():
 835     """Return an empty mimetools.Message object."""
 836     global _noheaders
 837     if _noheaders is None:
 838         import mimetools
 839         try:
 840             from cStringIO import StringIO
 841         except ImportError:
 842             from StringIO import StringIO
 843         _noheaders = mimetools.Message(StringIO(), 0)
 844         _noheaders.fp.close()   # Recycle file descriptor
 845     return _noheaders
 846
 847
 848 # Utility classes
 849
 850 class ftpwrapper:
 851     """Class used by open_ftp() for cache of open FTP connections."""
 852
 853     def __init__(self, user, passwd, host, port, dirs,
 854                  timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
 855                  persistent=True):
 856         self.user = user
 857         self.passwd = passwd
 858         self.host = host
 859         self.port = port
 860         self.dirs = dirs
 861         self.timeout = timeout
 862         self.refcount = 0
 863         self.keepalive = persistent
 864         self.init()
 865
 866     def init(self):
 867         import ftplib
 868         self.busy = 0
 869         self.ftp = ftplib.FTP()
 870         self.ftp.connect(self.host, self.port, self.timeout)
 871         self.ftp.login(self.user, self.passwd)
 872         for dir in self.dirs:
 873             self.ftp.cwd(dir)
 874
 875     def retrfile(self, file, type):
 876         import ftplib
 877         self.endtransfer()
 878         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
 879         else: cmd = 'TYPE ' + type; isdir = 0
 880         try:
 881             self.ftp.voidcmd(cmd)
 882         except ftplib.all_errors:
 883             self.init()
 884             self.ftp.voidcmd(cmd)
 885         conn = None
 886         if file and not isdir:
 887             # Try to retrieve as a file
 888             try:
 889                 cmd = 'RETR ' + file
 890                 conn, retrlen = self.ftp.ntransfercmd(cmd)
 891             except ftplib.error_perm, reason:
 892                 if str(reason)[:3] != '550':
 893                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
 894         if not conn:
 895             # Set transfer mode to ASCII!
 896             self.ftp.voidcmd('TYPE A')
 897             # Try a directory listing. Verify that directory exists.
 898             if file:
 899                 pwd = self.ftp.pwd()
 900                 try:
 901                     try:
 902                         self.ftp.cwd(file)
 903                     except ftplib.error_perm, reason:
 904                         raise IOError, ('ftp error', reason), sys.exc_info()[2]
 905                 finally:
 906                     self.ftp.cwd(pwd)
 907                 cmd = 'LIST ' + file
 908             else:
 909                 cmd = 'LIST'
 910             conn, retrlen = self.ftp.ntransfercmd(cmd)
 911         self.busy = 1
 912         ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
 913         self.refcount += 1
 914         conn.close()
 915         # Pass back both a suitably decorated object and a retrieval length
 916         return (ftpobj, retrlen)
 917
 918     def endtransfer(self):
 919         if not self.busy:
 920             return
 921         self.busy = 0
 922         try:
 923             self.ftp.voidresp()
 924         except ftperrors():
 925             pass
 926
 927     def close(self):
 928         self.keepalive = False
 929         if self.refcount <= 0:
 930             self.real_close()
 931
 932     def file_close(self):
 933         self.endtransfer()
 934         self.refcount -= 1
 935         if self.refcount <= 0 and not self.keepalive:
 936             self.real_close()
 937
 938     def real_close(self):
 939         self.endtransfer()
 940         try:
 941             self.ftp.close()
 942         except ftperrors():
 943             pass
 944
 945 class addbase:
 946     """Base class for addinfo and addclosehook."""
 947
 948     def __init__(self, fp):
 949         self.fp = fp
 950         self.read = self.fp.read
 951         self.readline = self.fp.readline
 952         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
 953         if hasattr(self.fp, "fileno"):
 954             self.fileno = self.fp.fileno
 955         else:
 956             self.fileno = lambda: None
 957         if hasattr(self.fp, "__iter__"):
 958             self.__iter__ = self.fp.__iter__
 959             if hasattr(self.fp, "next"):
 960                 self.next = self.fp.next
 961
 962     def __repr__(self):
 963         return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
 964                                              id(self), self.fp)
 965
 966     def close(self):
 967         self.read = None
 968         self.readline = None
 969         self.readlines = None
 970         self.fileno = None
 971         if self.fp: self.fp.close()
 972         self.fp = None
 973
 974 class addclosehook(addbase):
 975     """Class to add a close hook to an open file."""
 976
 977     def __init__(self, fp, closehook, *hookargs):
 978         addbase.__init__(self, fp)
 979         self.closehook = closehook
 980         self.hookargs = hookargs
 981
 982     def close(self):
 983         addbase.close(self)
 984         if self.closehook:
 985             self.closehook(*self.hookargs)
 986             self.closehook = None
 987             self.hookargs = None
 988
 989 class addinfo(addbase):
 990     """class to add an info() method to an open file."""
 991
 992     def __init__(self, fp, headers):
 993         addbase.__init__(self, fp)
 994         self.headers = headers
 995
 996     def info(self):
 997         return self.headers
 998
 999 class addinfourl(addbase):
1000     """class to add info() and geturl() methods to an open file."""
1001
1002     def __init__(self, fp, headers, url, code=None):
1003         addbase.__init__(self, fp)
1004         self.headers = headers
1005         self.url = url
1006         self.code = code
1007
1008     def info(self):
1009         return self.headers
1010
1011     def getcode(self):
1012         return self.code
1013
1014     def geturl(self):
1015         return self.url
1016
1017
1018 # Utilities to parse URLs (most of these return None for missing parts):
1019 # unwrap('<URL:type://host/path>') --> 'type://host/path'
1020 # splittype('type:opaquestring') --> 'type', 'opaquestring'
1021 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
1022 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1023 # splitpasswd('user:passwd') -> 'user', 'passwd'
1024 # splitport('host:port') --> 'host', 'port'
1025 # splitquery('/path?query') --> '/path', 'query'
1026 # splittag('/path#tag') --> '/path', 'tag'
1027 # splitattr('/path;attr1=value1;attr2=value2;...') ->
1028 #   '/path', ['attr1=value1', 'attr2=value2', ...]
1029 # splitvalue('attr=value') --> 'attr', 'value'
1030 # unquote('abc%20def') -> 'abc def'
1031 # quote('abc def') -> 'abc%20def')
1032
1033 try:
1034     unicode
1035 except NameError:
1036     def _is_unicode(x):
1037         return 0
1038 else:
1039     def _is_unicode(x):
1040         return isinstance(x, unicode)
1041
1042 def toBytes(url):
1043     """toBytes(u"URL") --> 'URL'."""
1044     # Most URL schemes require ASCII. If that changes, the conversion
1045     # can be relaxed
1046     if _is_unicode(url):
1047         try:
1048             url = url.encode("ASCII")
1049         except UnicodeError:
1050             raise UnicodeError("URL " + repr(url) +
1051                                " contains non-ASCII characters")
1052     return url
1053
1054 def unwrap(url):
1055     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1056     url = url.strip()
1057     if url[:1] == '<' and url[-1:] == '>':
1058         url = url[1:-1].strip()
1059     if url[:4] == 'URL:': url = url[4:].strip()
1060     return url
1061
1062 _typeprog = None
1063 def splittype(url):
1064     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1065     global _typeprog
1066     if _typeprog is None:
1067         import re
1068         _typeprog = re.compile('^([^/:]+):')
1069
1070     match = _typeprog.match(url)
1071     if match:
1072         scheme = match.group(1)
1073         return scheme.lower(), url[len(scheme) + 1:]
1074     return None, url
1075
1076 _hostprog = None
1077 def splithost(url):
1078     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1079     global _hostprog
1080     if _hostprog is None:
1081         import re
1082         _hostprog = re.compile('^//([^/?]*)(.*)$')
1083
1084     match = _hostprog.match(url)
1085     if match:
1086         host_port = match.group(1)
1087         path = match.group(2)
1088         if path and not path.startswith('/'):
1089             path = '/' + path
1090         return host_port, path
1091     return None, url
1092
1093 _userprog = None
1094 def splituser(host):
1095     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1096     global _userprog
1097     if _userprog is None:
1098         import re
1099         _userprog = re.compile('^(.*)@(.*)$')
1100
1101     match = _userprog.match(host)
1102     if match: return match.group(1, 2)
1103     return None, host
1104
1105 _passwdprog = None
1106 def splitpasswd(user):
1107     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1108     global _passwdprog
1109     if _passwdprog is None:
1110         import re
1111         _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
1112
1113     match = _passwdprog.match(user)
1114     if match: return match.group(1, 2)
1115     return user, None
1116
1117 # splittag('/path#tag') --> '/path', 'tag'
1118 _portprog = None
1119 def splitport(host):
1120     """splitport('host:port') --> 'host', 'port'."""
1121     global _portprog
1122     if _portprog is None:
1123         import re
1124         _portprog = re.compile('^(.*):([0-9]+)$')
1125
1126     match = _portprog.match(host)
1127     if match: return match.group(1, 2)
1128     return host, None
1129
1130 _nportprog = None
1131 def splitnport(host, defport=-1):
1132     """Split host and port, returning numeric port.
1133     Return given default port if no ':' found; defaults to -1.
1134     Return numerical port if a valid number are found after ':'.
1135     Return None if ':' but not a valid number."""
1136     global _nportprog
1137     if _nportprog is None:
1138         import re
1139         _nportprog = re.compile('^(.*):(.*)$')
1140
1141     match = _nportprog.match(host)
1142     if match:
1143         host, port = match.group(1, 2)
1144         try:
1145             if not port: raise ValueError, "no digits"
1146             nport = int(port)
1147         except ValueError:
1148             nport = None
1149         return host, nport
1150     return host, defport
1151
1152 _queryprog = None
1153 def splitquery(url):
1154     """splitquery('/path?query') --> '/path', 'query'."""
1155     global _queryprog
1156     if _queryprog is None:
1157         import re
1158         _queryprog = re.compile('^(.*)\?([^?]*)$')
1159
1160     match = _queryprog.match(url)
1161     if match: return match.group(1, 2)
1162     return url, None
1163
1164 _tagprog = None
1165 def splittag(url):
1166     """splittag('/path#tag') --> '/path', 'tag'."""
1167     global _tagprog
1168     if _tagprog is None:
1169         import re
1170         _tagprog = re.compile('^(.*)#([^#]*)$')
1171
1172     match = _tagprog.match(url)
1173     if match: return match.group(1, 2)
1174     return url, None
1175
1176 def splitattr(url):
1177     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1178         '/path', ['attr1=value1', 'attr2=value2', ...]."""
1179     words = url.split(';')
1180     return words[0], words[1:]
1181
1182 _valueprog = None
1183 def splitvalue(attr):
1184     """splitvalue('attr=value') --> 'attr', 'value'."""
1185     global _valueprog
1186     if _valueprog is None:
1187         import re
1188         _valueprog = re.compile('^([^=]*)=(.*)$')
1189
1190     match = _valueprog.match(attr)
1191     if match: return match.group(1, 2)
1192     return attr, None
1193
1194 # urlparse contains a duplicate of this method to avoid a circular import.  If
1195 # you update this method, also update the copy in urlparse.  This code
1196 # duplication does not exist in Python3.
1197
1198 _hexdig = '0123456789ABCDEFabcdef'
1199 _hextochr = dict((a + b, chr(int(a + b, 16)))
1200                  for a in _hexdig for b in _hexdig)
1201
1202 def unquote(s):
1203     """unquote('abc%20def') -> 'abc def'."""
1204     res = s.split('%')
1205     # fastpath
1206     if len(res) == 1:
1207         return s
1208     s = res[0]
1209     for item in res[1:]:
1210         try:
1211             s += _hextochr[item[:2]] + item[2:]
1212         except KeyError:
1213             s += '%' + item
1214         except UnicodeDecodeError:
1215             s += unichr(int(item[:2], 16)) + item[2:]
1216     return s
1217
1218 def unquote_plus(s):
1219     """unquote('%7e/abc+def') -> '~/abc def'"""
1220     s = s.replace('+', ' ')
1221     return unquote(s)
1222
1223 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1224                'abcdefghijklmnopqrstuvwxyz'
1225                '0123456789' '_.-')
1226 _safe_map = {}
1227 for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1228     _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1229 _safe_quoters = {}
1230
1231 def quote(s, safe='/'):
1232     """quote('abc def') -> 'abc%20def'
1233
1234     Each part of a URL, e.g. the path info, the query, etc., has a
1235     different set of reserved characters that must be quoted.
1236
1237     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1238     the following reserved characters.
1239
1240     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1241                   "$" | ","
1242
1243     Each of these characters is reserved in some component of a URL,
1244     but not necessarily in all of them.
1245
1246     By default, the quote function is intended for quoting the path
1247     section of a URL.  Thus, it will not encode '/'.  This character
1248     is reserved, but in typical usage the quote function is being
1249     called on a path where the existing slash characters are used as
1250     reserved characters.
1251     """
1252     # fastpath
1253     if not s:
1254         if s is None:
1255             raise TypeError('None object cannot be quoted')
1256         return s
1257     cachekey = (safe, always_safe)
1258     try:
1259         (quoter, safe) = _safe_quoters[cachekey]
1260     except KeyError:
1261         safe_map = _safe_map.copy()
1262         safe_map.update([(c, c) for c in safe])
1263         quoter = safe_map.__getitem__
1264         safe = always_safe + safe
1265         _safe_quoters[cachekey] = (quoter, safe)
1266     if not s.rstrip(safe):
1267         return s
1268     return ''.join(map(quoter, s))
1269
1270 def quote_plus(s, safe=''):
1271     """Quote the query fragment of a URL; replacing ' ' with '+'"""
1272     if ' ' in s:
1273         s = quote(s, safe + ' ')
1274         return s.replace(' ', '+')
1275     return quote(s, safe)
1276
1277 def urlencode(query, doseq=0):
1278     """Encode a sequence of two-element tuples or dictionary into a URL query string.
1279
1280     If any values in the query arg are sequences and doseq is true, each
1281     sequence element is converted to a separate parameter.
1282
1283     If the query arg is a sequence of two-element tuples, the order of the
1284     parameters in the output will match the order of parameters in the
1285     input.
1286     """
1287
1288     if hasattr(query,"items"):
1289         # mapping objects
1290         query = query.items()
1291     else:
1292         # it's a bother at times that strings and string-like objects are
1293         # sequences...
1294         try:
1295             # non-sequence items should not work with len()
1296             # non-empty strings will fail this
1297             if len(query) and not isinstance(query[0], tuple):
1298                 raise TypeError
1299             # zero-length sequences of all types will get here and succeed,
1300             # but that's a minor nit - since the original implementation
1301             # allowed empty dicts that type of behavior probably should be
1302             # preserved for consistency
1303         except TypeError:
1304             ty,va,tb = sys.exc_info()
1305             raise TypeError, "not a valid non-string sequence or mapping object", tb
1306
1307     l = []
1308     if not doseq:
1309         # preserve old behavior
1310         for k, v in query:
1311             k = quote_plus(str(k))
1312             v = quote_plus(str(v))
1313             l.append(k + '=' + v)
1314     else:
1315         for k, v in query:
1316             k = quote_plus(str(k))
1317             if isinstance(v, str):
1318                 v = quote_plus(v)
1319                 l.append(k + '=' + v)
1320             elif _is_unicode(v):
1321                 # is there a reasonable way to convert to ASCII?
1322                 # encode generates a string, but "replace" or "ignore"
1323                 # lose information and "strict" can raise UnicodeError
1324                 v = quote_plus(v.encode("ASCII","replace"))
1325                 l.append(k + '=' + v)
1326             else:
1327                 try:
1328                     # is this a sufficient test for sequence-ness?
1329                     len(v)
1330                 except TypeError:
1331                     # not a sequence
1332                     v = quote_plus(str(v))
1333                     l.append(k + '=' + v)
1334                 else:
1335                     # loop over the sequence
1336                     for elt in v:
1337                         l.append(k + '=' + quote_plus(str(elt)))
1338     return '&'.join(l)
1339
1340 # Proxy handling
1341 def getproxies_environment():
1342     """Return a dictionary of scheme -> proxy server URL mappings.
1343
1344     Scan the environment for variables named <scheme>_proxy;
1345     this seems to be the standard convention.  If you need a
1346     different way, you can pass a proxies dictionary to the
1347     [Fancy]URLopener constructor.
1348
1349     """
1350     proxies = {}
1351     for name, value in os.environ.items():
1352         name = name.lower()
1353         if value and name[-6:] == '_proxy':
1354             proxies[name[:-6]] = value
1355     return proxies
1356
1357 def proxy_bypass_environment(host):
1358     """Test if proxies should not be used for a particular host.
1359
1360     Checks the environment for a variable named no_proxy, which should
1361     be a list of DNS suffixes separated by commas, or '*' for all hosts.
1362     """
1363     no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1364     # '*' is special case for always bypass
1365     if no_proxy == '*':
1366         return 1
1367     # strip port off host
1368     hostonly, port = splitport(host)
1369     # check if the host ends with any of the DNS suffixes
1370     no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1371     for name in no_proxy_list:
1372         if name and (hostonly.endswith(name) or host.endswith(name)):
1373             return 1
1374     # otherwise, don't bypass
1375     return 0
1376
1377
1378 if sys.platform == 'darwin':
1379     from _scproxy import _get_proxy_settings, _get_proxies
1380
1381     def proxy_bypass_macosx_sysconf(host):
1382         """
1383         Return True iff this host shouldn't be accessed using a proxy
1384
1385         This function uses the MacOSX framework SystemConfiguration
1386         to fetch the proxy information.
1387         """
1388         import re
1389         import socket
1390         from fnmatch import fnmatch
1391
1392         hostonly, port = splitport(host)
1393
1394         def ip2num(ipAddr):
1395             parts = ipAddr.split('.')
1396             parts = map(int, parts)
1397             if len(parts) != 4:
1398                 parts = (parts + [0, 0, 0, 0])[:4]
1399             return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1400
1401         proxy_settings = _get_proxy_settings()
1402
1403         # Check for simple host names:
1404         if '.' not in host:
1405             if proxy_settings['exclude_simple']:
1406                 return True
1407
1408         hostIP = None
1409
1410         for value in proxy_settings.get('exceptions', ()):
1411             # Items in the list are strings like these: *.local, 169.254/16
1412             if not value: continue
1413
1414             m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1415             if m is not None:
1416                 if hostIP is None:
1417                     try:
1418                         hostIP = socket.gethostbyname(hostonly)
1419                         hostIP = ip2num(hostIP)
1420                     except socket.error:
1421                         continue
1422
1423                 base = ip2num(m.group(1))
1424                 mask = m.group(2)
1425                 if mask is None:
1426                     mask = 8 * (m.group(1).count('.') + 1)
1427
1428                 else:
1429                     mask = int(mask[1:])
1430                 mask = 32 - mask
1431
1432                 if (hostIP >> mask) == (base >> mask):
1433                     return True
1434
1435             elif fnmatch(host, value):
1436                 return True
1437
1438         return False
1439
1440     def getproxies_macosx_sysconf():
1441         """Return a dictionary of scheme -> proxy server URL mappings.
1442
1443         This function uses the MacOSX framework SystemConfiguration
1444         to fetch the proxy information.
1445         """
1446         return _get_proxies()
1447
1448     def proxy_bypass(host):
1449         if getproxies_environment():
1450             return proxy_bypass_environment(host)
1451         else:
1452             return proxy_bypass_macosx_sysconf(host)
1453
1454     def getproxies():
1455         return getproxies_environment() or getproxies_macosx_sysconf()
1456
1457 elif os.name == 'nt':
1458     def getproxies_registry():
1459         """Return a dictionary of scheme -> proxy server URL mappings.
1460
1461         Win32 uses the registry to store proxies.
1462
1463         """
1464         proxies = {}
1465         try:
1466             import _winreg
1467         except ImportError:
1468             # Std module, so should be around - but you never know!
1469             return proxies
1470         try:
1471             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1472                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1473             proxyEnable = _winreg.QueryValueEx(internetSettings,
1474                                                'ProxyEnable')[0]
1475             if proxyEnable:
1476                 # Returned as Unicode but problems if not converted to ASCII
1477                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1478                                                        'ProxyServer')[0])
1479                 if '=' in proxyServer:
1480                     # Per-protocol settings
1481                     for p in proxyServer.split(';'):
1482                         protocol, address = p.split('=', 1)
1483                         # See if address has a type:// prefix
1484                         import re
1485                         if not re.match('^([^/:]+)://', address):
1486                             address = '%s://%s' % (protocol, address)
1487                         proxies[protocol] = address
1488                 else:
1489                     # Use one setting for all protocols
1490                     if proxyServer[:5] == 'http:':
1491                         proxies['http'] = proxyServer
1492                     else:
1493                         proxies['http'] = 'http://%s' % proxyServer
1494                         proxies['https'] = 'https://%s' % proxyServer
1495                         proxies['ftp'] = 'ftp://%s' % proxyServer
1496             internetSettings.Close()
1497         except (WindowsError, ValueError, TypeError):
1498             # Either registry key not found etc, or the value in an
1499             # unexpected format.
1500             # proxies already set up to be empty so nothing to do
1501             pass
1502         return proxies
1503
1504     def getproxies():
1505         """Return a dictionary of scheme -> proxy server URL mappings.
1506
1507         Returns settings gathered from the environment, if specified,
1508         or the registry.
1509
1510         """
1511         return getproxies_environment() or getproxies_registry()
1512
1513     def proxy_bypass_registry(host):
1514         try:
1515             import _winreg
1516             import re
1517         except ImportError:
1518             # Std modules, so should be around - but you never know!
1519             return 0
1520         try:
1521             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1522                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1523             proxyEnable = _winreg.QueryValueEx(internetSettings,
1524                                                'ProxyEnable')[0]
1525             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1526                                                      'ProxyOverride')[0])
1527             # ^^^^ Returned as Unicode but problems if not converted to ASCII
1528         except WindowsError:
1529             return 0
1530         if not proxyEnable or not proxyOverride:
1531             return 0
1532         # try to make a host list from name and IP address.
1533         rawHost, port = splitport(host)
1534         host = [rawHost]
1535         try:
1536             addr = socket.gethostbyname(rawHost)
1537             if addr != rawHost:
1538                 host.append(addr)
1539         except socket.error:
1540             pass
1541         try:
1542             fqdn = socket.getfqdn(rawHost)
1543             if fqdn != rawHost:
1544                 host.append(fqdn)
1545         except socket.error:
1546             pass
1547         # make a check value list from the registry entry: replace the
1548         # '<local>' string by the localhost entry and the corresponding
1549         # canonical entry.
1550         proxyOverride = proxyOverride.split(';')
1551         # now check if we match one of the registry values.
1552         for test in proxyOverride:
1553             if test == '<local>':
1554                 if '.' not in rawHost:
1555                     return 1
1556             test = test.replace(".", r"\.")     # mask dots
1557             test = test.replace("*", r".*")     # change glob sequence
1558             test = test.replace("?", r".")      # change glob char
1559             for val in host:
1560                 # print "%s <--> %s" %( test, val )
1561                 if re.match(test, val, re.I):
1562                     return 1
1563         return 0
1564
1565     def proxy_bypass(host):
1566         """Return a dictionary of scheme -> proxy server URL mappings.
1567
1568         Returns settings gathered from the environment, if specified,
1569         or the registry.
1570
1571         """
1572         if getproxies_environment():
1573             return proxy_bypass_environment(host)
1574         else:
1575             return proxy_bypass_registry(host)
1576
1577 else:
1578     # By default use environment variables
1579     getproxies = getproxies_environment
1580     proxy_bypass = proxy_bypass_environment
1581
1582 # Test and time quote() and unquote()
1583 def test1():
1584     s = ''
1585     for i in range(256): s = s + chr(i)
1586     s = s*4
1587     t0 = time.time()
1588     qs = quote(s)
1589     uqs = unquote(qs)
1590     t1 = time.time()
1591     if uqs != s:
1592         print 'Wrong!'
1593     print repr(s)
1594     print repr(qs)
1595     print repr(uqs)
1596     print round(t1 - t0, 3), 'sec'
1597
1598
1599 def reporthook(blocknum, blocksize, totalsize):
1600     # Report during remote transfers
1601     print "Block number: %d, Block size: %d, Total size: %d" % (
1602         blocknum, blocksize, totalsize)