tic/repo.py

   1 #!/usr/bin/python
   2 # Copyright (c) 2000 - 2016 Samsung Electronics Co., Ltd. All rights reserved.
   3 #
   4 # Contact:
   5 # @author Chulwoo Shin <cw1.shin@samsung.com>
   6 #
   7 # Licensed under the Apache License, Version 2.0 (the "License");
   8 # you may not use this file except in compliance with the License.
   9 # You may obtain a copy of the License at
  10 #
  11 # http://www.apache.org/licenses/LICENSE-2.0
  12 #
  13 # Unless required by applicable law or agreed to in writing, software
  14 # distributed under the License is distributed on an "AS IS" BASIS,
  15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16 # See the License for the specific language governing permissions and
  17 # limitations under the License.
  18 #
  19 # Contributors:
  20 # - S-Core Co., Ltd
  21
  22 import logging
  23 import os
  24 import base64
  25 import hashlib
  26 import collections
  27 from lxml import etree
  28 from tic.utils import file
  29 from tic.utils import process
  30 from tic.utils.error import TICError
  31 from tic.utils.grabber import myurlgrab2
  32 from tic.utils import misc
  33
  34 def _get_uncompressed_data_from_url(url, filename, proxies=None):
  35     # download file
  36     filename = myurlgrab2(url, filename)
  37     # Check if file compressed or not
  38     if filename.endswith(".gz"):
  39         decompress_filename = os.path.splitext(filename)[0]
  40         filename = file.decompress_gzip(filename, decompress_filename)
  41     elif filename.endswith(".bz2"):
  42         process.run(['bunzip2', "-f", filename])
  43         filename = os.path.splitext(filename)[0]
  44     return filename
  45
  46 def _get_metadata_from_repo(baseurl, proxies, tempdir, cachedir, reponame, filehref,
  47                             sumtype=None, checksum=None):
  48     logger = logging.getLogger(__name__)
  49     url = os.path.join(baseurl, filehref)
  50     filename_tmp = str("%s/%s" % (cachedir, os.path.basename(filehref)))
  51     if os.path.splitext(filename_tmp)[1] in (".gz", ".bz2"):
  52         filename = os.path.splitext(filename_tmp)[0]
  53     else:
  54         filename = filename_tmp
  55     if sumtype and checksum and os.path.exists(filename):
  56         if sumtype == 'sha256':
  57             file_checksum = hashlib.sha256(open(filename, 'rb').read()).hexdigest()
  58         elif sumtype == 'md5':
  59             file_checksum = hashlib.md5(open(filename, 'rb').read()).hexdigest()
  60         else:
  61             sumcmd = "%ssum" % sumtype
  62             result = process.run([sumcmd, filename])[1].strip()
  63             file_checksum = result.split()[0]
  64         # use cached file
  65         if file_checksum and file_checksum == checksum:
  66             logger.info('use a cache file - ' + str(filename))
  67             return filename
  68
  69     temp_file = os.path.join(tempdir, os.path.basename(filehref))
  70     file_path =_get_uncompressed_data_from_url(url, temp_file, proxies)
  71     return file.copyfile_flock(file_path, filename)
  72
  73 def get_repodata_from_repos(repos, cachedir):
  74     my_repodata = []
  75     temp_path = os.path.join(cachedir, 'temp', str(misc.get_timestamp()))
  76     for repo in repos:
  77         reponame = repo.get('name')
  78         baseurl = repo.get('url')
  79
  80         # make temp_dir
  81         base64url = base64.urlsafe_b64encode(baseurl)
  82         temp_dir = os.path.join(temp_path, base64url);
  83         repomd_file = os.path.join(temp_dir, 'repomd.xml')
  84         file.make_dirs(temp_dir);
  85
  86         #TODO: support local files(local directory)
  87         # local/remote repository
  88         url = os.path.join(baseurl, 'repodata/repomd.xml')
  89         repomd = myurlgrab2(url, repomd_file)
  90
  91         try:
  92             tree = etree.parse(repomd)
  93             root = tree.getroot()
  94         except etree.XMLSyntaxError:
  95             raise TICError("Unable to parse repomd.xml. Please check the repomd from repository url(%s)", url)
  96
  97         # make cache_dir
  98         repo_checksum = hashlib.sha256(open(repomd_file, 'rb').read()).hexdigest()
  99         cache_dir = os.path.join(cachedir, 'cached', base64url, repo_checksum)
 100         file.make_dirs(cache_dir)
 101
 102         ns = root.tag
 103         ns = ns[0:ns.rindex("}")+1]
 104
 105         filepaths = {}
 106         checksums = {}
 107         sumtypes = {}
 108
 109         for elm in root.findall("%sdata" % ns):
 110             if elm.attrib['type'] == 'patterns':
 111                 filepaths['patterns'] = elm.find("%slocation" % ns).attrib['href']
 112                 checksums['patterns'] = elm.find("%sopen-checksum" % ns).text
 113                 sumtypes['patterns'] = elm.find("%sopen-checksum" % ns).attrib['type']
 114             elif elm.attrib['type'] == 'group':
 115                 filepaths['comps'] = elm.find("%slocation" % ns).attrib['href']
 116                 checksums['comps'] = elm.find("%sopen-checksum" % ns).text
 117                 sumtypes['comps'] = elm.find("%sopen-checksum" % ns).attrib['type']
 118             elif elm.attrib["type"] == 'primary':
 119                 filepaths['primary'] = elm.find("%slocation" % ns).attrib['href']
 120                 checksums['primary'] = elm.find("%sopen-checksum" % ns).text
 121                 sumtypes['primary'] = elm.find("%sopen-checksum" % ns).attrib['type']
 122
 123         for item in ("primary", "patterns", "comps"):
 124             if item not in filepaths:
 125                 filepaths[item] = None
 126                 continue
 127             filepaths[item] = _get_metadata_from_repo(baseurl,
 128                                                       None,
 129                                                       temp_dir,
 130                                                       cache_dir,
 131                                                       reponame,
 132                                                       filepaths[item],
 133                                                       sumtypes[item],
 134                                                       checksums[item])
 135         my_repodata.append({"name":reponame,
 136                             "baseurl":baseurl,
 137                             "checksum":repo_checksum,
 138                             "repomd":repomd,
 139                             "primary":filepaths['primary'],
 140                             "cachedir":cache_dir,
 141                             "proxies":None,
 142                             "patterns":filepaths['patterns'],
 143                             "comps":filepaths['comps']})
 144     return my_repodata
 145
 146
 147 RepoType = collections.namedtuple('Repo', 'name, url')
 148 def Repo(name, baseurl):
 149     return RepoType(name, baseurl)