From c088e3075b3fd5034d2709f7ed1df1162e7b47ae Mon Sep 17 00:00:00 2001 From: Huang Hao Date: Mon, 20 Aug 2012 19:45:38 +0800 Subject: [PATCH] Use pycurl Keep-Alive feature to save download time. When GBS parse "latest" type of repo url, it tries to download several URLs. This usually costs about 30 seconds to complete. By using keepalive it takes about 10 seconds. pycurl print "Re-using existing connection!" if pycurl.VERBOSE set to 1. Change-Id: Ib78b204c26d9077c15e0ea7ff7b032ad2314546f --- gitbuildsys/utils.py | 100 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 38 deletions(-) diff --git a/gitbuildsys/utils.py b/gitbuildsys/utils.py index 270d3c4..fbb6804 100644 --- a/gitbuildsys/utils.py +++ b/gitbuildsys/utils.py @@ -147,40 +147,63 @@ class TempCopy(object): if os.path.exists(self.name): os.unlink(self.name) -def urlgrab(url, filename, user = None, passwd = None): - - outfile = open(filename, 'w') - curl = pycurl.Curl() - curl.setopt(pycurl.URL, url) - curl.setopt(pycurl.WRITEDATA, outfile) - curl.setopt(pycurl.FAILONERROR, True) - curl.setopt(pycurl.FOLLOWLOCATION, True) - curl.setopt(pycurl.SSL_VERIFYPEER, False) - curl.setopt(pycurl.SSL_VERIFYHOST, False) - curl.setopt(pycurl.CONNECTTIMEOUT, 30) - if user: - userpwd = user - if passwd: - userpwd = '%s:%s' % (user, passwd) - curl.setopt(pycurl.USERPWD, userpwd) - try: - curl.perform() - except pycurl.error, err: - errcode = err.args[0] - if errcode == pycurl.E_OPERATION_TIMEOUTED: - raise errors.UrlError('timeout on %s: %s' % (url, err)) - elif errcode == pycurl.E_FILESIZE_EXCEEDED: - raise errors.UrlError('max download size exceeded on %s'\ - % url) - else: - errmsg = 'pycurl error %s - "%s"' % (errcode, str(err.args[1])) - raise errors.UrlError(errmsg) - finally: - outfile.close() - curl.close() +class URLGrabber(object): + '''grab an url and save to local file''' + + def __init__(self, connect_timeout=30): + '''create Curl object and set one-time options''' + curl = pycurl.Curl() + curl.setopt(pycurl.FAILONERROR, True) + curl.setopt(pycurl.FOLLOWLOCATION, True) + curl.setopt(pycurl.SSL_VERIFYPEER, False) + curl.setopt(pycurl.SSL_VERIFYHOST, False) + curl.setopt(pycurl.CONNECTTIMEOUT, connect_timeout) + #curl.setopt(pycurl.VERBOSE, 1) + self.curl = curl + + def change_url(self, url, outfile, user, passwd): + '''change options for individual url''' + + curl = self.curl + curl.url = url + curl.setopt(pycurl.URL, url) + curl.setopt(pycurl.WRITEDATA, outfile) + if user: + userpwd = user + if passwd: + userpwd = '%s:%s' % (user, passwd) + curl.setopt(pycurl.USERPWD, userpwd) + + def perform(self): + '''do the real Curl perform work''' + + curl = self.curl + try: + curl.perform() + except pycurl.error, err: + errcode = err.args[0] + if errcode == pycurl.E_OPERATION_TIMEOUTED: + raise errors.UrlError('timeout on %s: %s' % (curl.url, err)) + elif errcode == pycurl.E_FILESIZE_EXCEEDED: + raise errors.UrlError('max download size exceeded on %s'\ + % curl.url) + else: + errmsg = 'pycurl error %s - "%s"' % (errcode, str(err.args[1])) + raise errors.UrlError(errmsg) + + def __del__(self): + '''close Curl object''' + self.curl.close() + self.curl = None + + def grab(self, url, filename, user=None, passwd=None): + '''grab url to filename''' + + with open(filename, 'w') as outfile: + self.change_url(url, outfile, user, passwd) + self.perform() - return filename class RepoParser(object): """ Repository parser for generate real repourl and build config @@ -194,6 +217,7 @@ class RepoParser(object): self.buildmeta = None self.buildconf = None self.standardrepos = [] + self.urlgrabber = URLGrabber() self.parse() def get_buildconf(self): @@ -228,7 +252,7 @@ class RepoParser(object): repomd_url = os.path.join(repo, 'repodata/repomd.xml') repomd_file = os.path.join(self.cachedir, 'repomd.xml') try: - urlgrab(repomd_url, repomd_file) + self.urlgrabber.grab(repomd_url, repomd_file) validrepos.append(repo) except errors.UrlError: pass @@ -246,7 +270,7 @@ class RepoParser(object): repomd_file = os.path.join(self.cachedir, 'repomd.xml') try: - urlgrab(repomd_url, repomd_file) + self.urlgrabber.grab(repomd_url, repomd_file) self.standardrepos.append(repo) # Try to download build.xml buildxml_url = urlparse.urljoin(repo.rstrip('/') + '/', \ @@ -256,13 +280,13 @@ class RepoParser(object): # Try to download build conf if self.buildconf is None: - urlgrab(buildxml_url, self.buildmeta) + self.urlgrabber.grab(buildxml_url, self.buildmeta) build_conf = self.get_buildconf() buildconf_url = buildxml_url.replace(os.path.basename \ (buildxml_url), build_conf) self.buildconf = os.path.join(self.cachedir, \ os.path.basename(buildconf_url)) - urlgrab(buildconf_url, self.buildconf) + self.urlgrabber.grab(buildconf_url, self.buildconf) # standard repo continue except errors.UrlError: @@ -277,7 +301,7 @@ class RepoParser(object): buildxml_url = os.path.join(repo, 'builddata/build.xml') self.buildmeta = os.path.join(self.cachedir, 'build.xml') try: - urlgrab(buildxml_url, self.buildmeta) + self.urlgrabber.grab(buildxml_url, self.buildmeta) except errors.UrlError: self.buildmeta = None continue @@ -292,7 +316,7 @@ class RepoParser(object): 'builddata/%s' % build_conf) self.buildconf = os.path.join(self.cachedir, \ os.path.basename(buildconf_url)) - urlgrab(buildconf_url, self.buildconf) + self.urlgrabber.grab(buildconf_url, self.buildconf) except errors.UrlError: self.buildconf = None -- 2.7.4