Merge remote-tracking branch 'upstream/develop' into refactor-urls

author Jérémy Bethmont <jeremy.bethmont@gmail.com>

Mon, 3 Oct 2011 09:14:59 +0000 (11:14 +0200)

committer Jérémy Bethmont <jeremy.bethmont@gmail.com>

Mon, 3 Oct 2011 09:14:59 +0000 (11:14 +0200)
author Jérémy Bethmont <jeremy.bethmont@gmail.com>
Mon, 3 Oct 2011 09:14:59 +0000 (11:14 +0200)
committer Jérémy Bethmont <jeremy.bethmont@gmail.com>
Mon, 3 Oct 2011 09:14:59 +0000 (11:14 +0200)
diff --cc requests/models.py

index 43b437ee5a2a0e797343d13909358d82e51e64ec,5235f36e04bf8062ddfbd0b4ee986da8eb939056..a47a16b6adfaaa0e6645d5a212e413a0087b2182
--- 1/requests/models.py
--- 2/requests/models.py
+++ b/requests/models.py
@@@ -208,15 -189,33 +189,19 @@@ class Request(object)
                   (self.allow_redirects))
               ):
   
-                 r.fo.close()
+                 # We already redirected. Don't keep it alive.
+                 # r.raw.close()
   
-                 if not len(history) < settings.max_redirects:
+                 # Woah, this is getting crazy.
+                 if len(history) >= self.config.get('max_redirects'):
                       raise TooManyRedirects()
   
+                 # Add the old request to the history collector.
                   history.append(r)
   
-                 url = get_clean_url(r.headers['location'], parent_url=r.url)
- -                # Redirect to...
- -                url = r.headers['location']
- -
- -                # Handle redirection without scheme (see: RFC 1808 Section 4)
- -                if url.startswith('//'):
- -                    parsed_rurl = urlparse(r.url)
- -                    url = '%s:%s' % (parsed_rurl.scheme, url)
- -
- -                # Facilitate non-RFC2616-compliant 'location' headers
- -                # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
- -                parsed_url = urlparse(url)
- -                if not parsed_url.netloc:
- -                    parsed_url = list(parsed_url)
- -                    parsed_url[2] = urllib.quote(parsed_url[2], safe="%/:=&?~#+!$,;'@()*[]")
- -                    url = urljoin(r.url, str(urlunparse(parsed_url)))
++                url = get_clean_url(r.headers['location'], parent_url=self.url)
   
+                 # If 303, convert to idempotent GET.
                   # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4
                   if r.status_code is codes.see_other:
                       method = 'GET'
@@@ -231,124 -244,128 +230,127 @@@
                   request.send()
                   r = request.response
   
+                 self.cookies.update(r.cookies or {})
+ 
+             # Insert collected history.
               r.history = history
   
+         # Attach Response to Request.
           self.response = r
-         self.response.request = self
- 
-     @staticmethod
-     def _encode_params(data):
-         """Encode parameters in a piece of data.
   
-         If the data supplied is a dictionary, encodes each parameter in it, and
-         returns a list of tuples containing the encoded parameters, and a urlencoded
-         version of that.
- 
-         Otherwise, assumes the data is already encoded appropriately, and
-         returns it twice.
-         """
- 
-         if hasattr(data, 'items'):
-             result = []
-             for k, vs in data.items():
-                 for v in isinstance(vs, list) and vs or [vs]:
-                     result.append((k.encode('utf-8') if isinstance(k, unicode) else k,
-                                    v.encode('utf-8') if isinstance(v, unicode) else v)
-                     )
-             return (result, urllib.urlencode(result, doseq=True))
- 
-         else:
-             return data, data
- 
-     def _build_url(self):
-         """Build the actual URL to use."""
+         # Give Response some context.
+         self.response.request = self
   
-         self.url = get_clean_url(self.url)
- 
-         if self._enc_params:
-             if urlparse(self.url).query:
-                 return '%s&%s' % (self.url, self._enc_params)
-             else:
-                 return '%s?%s' % (self.url, self._enc_params)
-         else:
-             return self.url
   
- -
       def send(self, anyway=False):
-         """Sends the request. Returns True of successful, false if not.
-         If there was an HTTPError during transmission,
-         self.response.status_code will contain the HTTPError code.
- 
-         Once a request is successfully sent, `sent` will equal True.
- 
-         :param anyway: If True, request will be sent, even if it has
-         already been sent.
-         """
+         """Sends the shit."""
   
+         # Safety check.
           self._checks()
   
-         # Logging
-         if settings.verbose:
-             settings.verbose.write('%s   %s   %s\n' % (
-                 datetime.now().isoformat(), self.method, self.url
-             ))
- 
-         url = self._build_url()
-         if self.method in ('GET', 'HEAD', 'DELETE'):
-             req = _Request(url, method=self.method)
-         else:
- 
-             if self.files:
-                 register_openers()
- 
-                 if self.data:
-                     self.files.update(self.data)
- 
-                 datagen, headers = multipart_encode(self.files)
-                 req = _Request(url, data=datagen, headers=headers, method=self.method)
- 
+         # Build the final URL.
+         url = build_url(self.url, self.params)
+ 
+         # Nottin' on you.
+         body = None
+         content_type = None
+ 
+         # Multi-part file uploads.
+         if self.files:
+             if not isinstance(self.data, basestring):
+                 fields = self.data.copy()
+                 for (k, v) in self.files.items():
+                     fields.update({k: (k, v.read())})
+                 (body, content_type) = encode_multipart_formdata(fields)
+ 
+         # Setup form data.
+         if self.data and (not body):
+             if isinstance(self.data, basestring):
+                 body = self.data
               else:
-                 req = _Request(url, data=self._enc_data, method=self.method)
+                 body = encode_params(self.data)
+                 content_type = 'application/x-www-form-urlencoded'
   
-         if self.headers:
-             for k, v in self.headers.iteritems():
-                 req.add_header(k, v)
+         # Setup cookies.
   
-         if not self.sent or anyway:
+         # Add content-type if it wasn't explicitly provided.
+         if (content_type) and (not 'content-type' in self.headers):
+             self.headers['Content-Type'] = content_type
   
-             try:
-                 opener = self._get_opener()
-                 try:
- 
-                     resp = opener(req, timeout=self.timeout)
+         # Only send the Request if new or forced.
+         if (anyway) or (not self.sent):
   
-                 except TypeError, err:
-                     # timeout argument is new since Python v2.6
-                     if not 'timeout' in str(err):
-                         raise
+             try:
+                 # Create a new HTTP connection, since one wasn't passed in.
+                 if not self._pools:
+ 
+                     # Create a pool manager for this one connection.
+                     pools = PoolManager(
+                         num_pools=self.config.get('max_connections'),
+                         maxsize=1,
+                         timeout=self.timeout
+                     )
   
-                     if settings.timeout_fallback:
-                         # fall-back and use global socket timeout (This is not thread-safe!)
-                         old_timeout = socket.getdefaulttimeout()
-                         socket.setdefaulttimeout(self.timeout)
+                     # Create a connection.
+                     connection = pools.connection_from_url(url)
   
-                     resp = opener(req)
+                     # One-off request. Delay fetching the content until needed.
+                     do_block = False
+                 else:
+                     # Create a connection.
+                     connection = self._pools.connection_from_url(url)
+ 
+                     # Syntax sugar.
+                     pools = self._pools
+ 
+                     # Part of a connection pool, so no fancy stuff. Sorry!
+                     do_block = True
+ 
+                 if self.cookies:
+                     # Skip if 'cookie' header is explicitly set.
+                     if 'cookie' not in self.headers:
+ 
+                         # Simple cookie with our dict.
+                         c = SimpleCookie()
+                         c.load(self.cookies)
+ 
+                         # Turn it into a header.
+                         cookie_header = c.output(header='').strip()
+ 
+                         # Attach Cookie header to request.
+                         self.headers['Cookie'] = cookie_header
+ 
+                 # Create the connection.
+                 r = connection.urlopen(
+                     method=self.method,
+                     url=url,
+                     body=body,
+                     headers=self.headers,
+                     redirect=False,
+                     assert_same_host=False,
+                     preload_content=do_block,
+                     decode_content=False
+                 )
   
-                     if settings.timeout_fallback:
-                         # restore gobal timeout
-                         socket.setdefaulttimeout(old_timeout)
+                 # Set the pools manager for redirections, if allowed.
+                 if self.config.get('keep_alive') and pools:
+                     self._pools = pools
   
-                 if self.cookiejar is not None:
-                     self.cookiejar.extract_cookies(resp, req)
   
-             except (urllib2.HTTPError, urllib2.URLError), why:
-                 if hasattr(why, 'reason'):
-                     if isinstance(why.reason, socket.timeout):
-                         why = Timeout(why)
+             # except (urllib2.HTTPError, urllib2.URLError), why:
+             except Exception, why:
+                 print why.__dict__
+                 # if hasattr(why, 'reason'):
+                 #     if isinstance(why.reason, socket.timeout):
+                 #         why = Timeout(why)
   
-                 self._build_response(why, is_error=True)
+                 # self._build_response(why, is_error=True)
+                 print 'FUCK'
+                 print why
   
               else:
-                 self._build_response(resp)
+                 # self.response = Response.from_urllib3()
+                 self._build_response(r)
                   self.response.ok = True
   
           self.sent = self.response.ok
diff --cc requests/utils.py

index e5036e4486774d7c0b2e39de2587f0e7a95b48fc,c422ac5c37a5e0a4b664b1b019b10fcbaacad2fe..1dcf3c2fcd8e695f4923799f79bfe5310264b182
--- 1/requests/utils.py
--- 2/requests/utils.py
+++ b/requests/utils.py
@@@ -15,35 -15,63 +15,76 @@@ import cookieli
   import re
   import urllib
   import zlib
- 
- -from urlparse import urlparse, urlunparse
+ +from urlparse import urlparse, urlunparse, urljoin
   
+ 
+ def encode_params(params):
+     """Encode parameters in a piece of data.
+ 
+     If the data supplied is a dictionary, encodes each parameter in it, and
+     returns a list of tuples containing the encoded parameters, and a urlencoded
+     version of that.
+ 
+     Otherwise, assumes the data is already encoded appropriately, and
+     returns it twice.
+     """
+ 
+     if hasattr(params, 'items'):
+         result = []
+         for k, vs in params.items():
+             for v in isinstance(vs, list) and vs or [vs]:
+                 result.append(
+                     (
+                         k.encode('utf-8') if isinstance(k, unicode) else k,
+                         v.encode('utf-8') if isinstance(v, unicode) else v
+                     )
+                 )
+         return urllib.urlencode(result, doseq=True)
+ 
+     else:
+         return params
+ 
+ +def get_clean_url(url, parent_url=None):
+ +    # Handle redirection without scheme (see: RFC 1808 Section 4)
+ +    if url.startswith('//'):
+ +        parsed_rurl = urlparse(parent_url)
+ +        url = '%s:%s' % (parsed_rurl.scheme, url)
   
- -def build_url(url, query_params):
- -    """Build the actual URL to use."""
- -
- -    # Support for unicode domain names and paths.
       scheme, netloc, path, params, query, fragment = urlparse(url)
- -    netloc = netloc.encode('idna')
+ +    if netloc:
+ +        netloc = netloc.encode('idna')
   
       if isinstance(path, unicode):
- -       path = path.encode('utf-8')
+ +        path = path.encode('utf-8')
   
       path = urllib.quote(path, safe="%/:=&?~#+!$,;'@()*[]")
+ +    params = urllib.quote(params, safe="%/:=&?~#+!$,;'@()*[]")
+ +    query = urllib.quote(query, safe="%/:=&?~#+!$,;'@()*[]")
+ +
+ +    url = str(urlunparse([scheme, netloc, path, params, query, fragment]))
+ +
+ +    # Facilitate non-RFC2616-compliant 'location' headers
+ +    # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
+ +    if not netloc and parent_url:
+ +        url = urljoin(parent_url, url)
   
- -    url = str(urlunparse(
- -     [scheme, netloc, path, params, query, fragment]
- -    ))
+ +    return url
+ +
++def build_url(url, query_params):
++    """Build the actual URL to use."""
++
++    url = get_clean_url(url)
+ 
+     query_params = encode_params(query_params)
+ 
+     if query_params:
+        if urlparse(url).query:
+            return '%s&%s' % (url, query_params)
+        else:
+            return '%s?%s' % (url, query_params)
+     else:
+        return url
+ 
- -
   def header_expand(headers):
       """Returns an HTTP Header value string from a dictionary.
author	Jérémy Bethmont <jeremy.bethmont@gmail.com>
	Mon, 3 Oct 2011 09:14:59 +0000 (11:14 +0200)
committer	Jérémy Bethmont <jeremy.bethmont@gmail.com>
	Mon, 3 Oct 2011 09:14:59 +0000 (11:14 +0200)
		1	2
requests/models.py	patch \|	diff1 \|	diff2 \|	blob \| history
requests/utils.py	patch \|	diff1 \|	diff2 \|	blob \| history