From c088e3075b3fd5034d2709f7ed1df1162e7b47ae Mon Sep 17 00:00:00 2001
From: Huang Hao <hao.h.huang@intel.com>
Date: Mon, 20 Aug 2012 19:45:38 +0800
Subject: [PATCH] Use pycurl Keep-Alive feature to save download time.

When GBS parse "latest" type of repo url, it tries to download
several URLs. This usually costs about 30 seconds to complete. By
using keepalive it takes about 10 seconds. pycurl print "Re-using
existing connection!" if pycurl.VERBOSE set to 1.

Change-Id: Ib78b204c26d9077c15e0ea7ff7b032ad2314546f
---
 gitbuildsys/utils.py | 100 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 62 insertions(+), 38 deletions(-)

diff --git a/gitbuildsys/utils.py b/gitbuildsys/utils.py
index 270d3c4..fbb6804 100644
--- a/gitbuildsys/utils.py
+++ b/gitbuildsys/utils.py
@@ -147,40 +147,63 @@ class TempCopy(object):
         if os.path.exists(self.name):
             os.unlink(self.name)
 
-def urlgrab(url, filename, user = None, passwd = None):
-
-    outfile = open(filename, 'w')
-    curl = pycurl.Curl()
-    curl.setopt(pycurl.URL, url)
-    curl.setopt(pycurl.WRITEDATA, outfile)
-    curl.setopt(pycurl.FAILONERROR, True)
-    curl.setopt(pycurl.FOLLOWLOCATION, True)
-    curl.setopt(pycurl.SSL_VERIFYPEER, False)
-    curl.setopt(pycurl.SSL_VERIFYHOST, False)
-    curl.setopt(pycurl.CONNECTTIMEOUT, 30)
-    if user:
-        userpwd = user
-        if passwd:
-            userpwd = '%s:%s' % (user, passwd)
-        curl.setopt(pycurl.USERPWD, userpwd)
 
-    try:
-        curl.perform()
-    except pycurl.error, err:
-        errcode = err.args[0]
-        if errcode == pycurl.E_OPERATION_TIMEOUTED:
-            raise errors.UrlError('timeout on %s: %s' % (url, err))
-        elif errcode == pycurl.E_FILESIZE_EXCEEDED:
-            raise errors.UrlError('max download size exceeded on %s'\
-                                       % url)
-        else:
-            errmsg = 'pycurl error %s - "%s"' % (errcode, str(err.args[1]))
-            raise errors.UrlError(errmsg)
-    finally:
-        outfile.close()
-        curl.close()
+class URLGrabber(object):
+    '''grab an url and save to local file'''
+
+    def __init__(self, connect_timeout=30):
+        '''create Curl object and set one-time options'''
+        curl = pycurl.Curl()
+        curl.setopt(pycurl.FAILONERROR, True)
+        curl.setopt(pycurl.FOLLOWLOCATION, True)
+        curl.setopt(pycurl.SSL_VERIFYPEER, False)
+        curl.setopt(pycurl.SSL_VERIFYHOST, False)
+        curl.setopt(pycurl.CONNECTTIMEOUT, connect_timeout)
+        #curl.setopt(pycurl.VERBOSE, 1)
+        self.curl = curl
+
+    def change_url(self, url, outfile, user, passwd):
+        '''change options for individual url'''
+
+        curl = self.curl
+        curl.url = url
+        curl.setopt(pycurl.URL, url)
+        curl.setopt(pycurl.WRITEDATA, outfile)
+        if user:
+            userpwd = user
+            if passwd:
+                userpwd = '%s:%s' % (user, passwd)
+            curl.setopt(pycurl.USERPWD, userpwd)
+
+    def perform(self):
+        '''do the real Curl perform work'''
+
+        curl = self.curl
+        try:
+            curl.perform()
+        except pycurl.error, err:
+            errcode = err.args[0]
+            if errcode == pycurl.E_OPERATION_TIMEOUTED:
+                raise errors.UrlError('timeout on %s: %s' % (curl.url, err))
+            elif errcode == pycurl.E_FILESIZE_EXCEEDED:
+                raise errors.UrlError('max download size exceeded on %s'\
+                                           % curl.url)
+            else:
+                errmsg = 'pycurl error %s - "%s"' % (errcode, str(err.args[1]))
+                raise errors.UrlError(errmsg)
+
+    def __del__(self):
+        '''close Curl object'''
+        self.curl.close()
+        self.curl = None
+
+    def grab(self, url, filename, user=None, passwd=None):
+        '''grab url to filename'''
+
+        with open(filename, 'w') as outfile:
+            self.change_url(url, outfile, user, passwd)
+            self.perform()
 
-    return filename
 
 class RepoParser(object):
     """ Repository parser for generate real repourl and build config
@@ -194,6 +217,7 @@ class RepoParser(object):
         self.buildmeta = None
         self.buildconf = None
         self.standardrepos = []
+        self.urlgrabber = URLGrabber()
         self.parse()
 
     def get_buildconf(self):
@@ -228,7 +252,7 @@ class RepoParser(object):
                 repomd_url = os.path.join(repo, 'repodata/repomd.xml')
                 repomd_file = os.path.join(self.cachedir, 'repomd.xml')
                 try:
-                    urlgrab(repomd_url, repomd_file)
+                    self.urlgrabber.grab(repomd_url, repomd_file)
                     validrepos.append(repo)
                 except errors.UrlError:
                     pass
@@ -246,7 +270,7 @@ class RepoParser(object):
             repomd_file = os.path.join(self.cachedir, 'repomd.xml')
 
             try:
-                urlgrab(repomd_url, repomd_file)
+                self.urlgrabber.grab(repomd_url, repomd_file)
                 self.standardrepos.append(repo)
                 # Try to download build.xml
                 buildxml_url = urlparse.urljoin(repo.rstrip('/') + '/',      \
@@ -256,13 +280,13 @@ class RepoParser(object):
 
                 # Try to download build conf
                 if self.buildconf is None:
-                    urlgrab(buildxml_url, self.buildmeta)
+                    self.urlgrabber.grab(buildxml_url, self.buildmeta)
                     build_conf = self.get_buildconf()
                     buildconf_url = buildxml_url.replace(os.path.basename    \
                                                     (buildxml_url), build_conf)
                     self.buildconf = os.path.join(self.cachedir,        \
                                           os.path.basename(buildconf_url))
-                    urlgrab(buildconf_url, self.buildconf)
+                    self.urlgrabber.grab(buildconf_url, self.buildconf)
                 # standard repo
                 continue
             except errors.UrlError:
@@ -277,7 +301,7 @@ class RepoParser(object):
             buildxml_url = os.path.join(repo, 'builddata/build.xml')
             self.buildmeta = os.path.join(self.cachedir, 'build.xml')
             try:
-                urlgrab(buildxml_url, self.buildmeta)
+                self.urlgrabber.grab(buildxml_url, self.buildmeta)
             except errors.UrlError:
                 self.buildmeta = None
                 continue
@@ -292,7 +316,7 @@ class RepoParser(object):
                                                 'builddata/%s' % build_conf)
                 self.buildconf = os.path.join(self.cachedir,                \
                                              os.path.basename(buildconf_url))
-                urlgrab(buildconf_url, self.buildconf)
+                self.urlgrabber.grab(buildconf_url, self.buildconf)
 
             except errors.UrlError:
                 self.buildconf = None
-- 
2.7.4