tic/repo.py

   1 #!/usr/bin/python
   2 # Copyright (c) 2000 - 2016 Samsung Electronics Co., Ltd. All rights reserved.
   3 #
   4 # Contact:
   5 # @author Chulwoo Shin <cw1.shin@samsung.com>
   6 #
   7 # Licensed under the Apache License, Version 2.0 (the "License");
   8 # you may not use this file except in compliance with the License.
   9 # You may obtain a copy of the License at
  10 #
  11 # http://www.apache.org/licenses/LICENSE-2.0
  12 #
  13 # Unless required by applicable law or agreed to in writing, software
  14 # distributed under the License is distributed on an "AS IS" BASIS,
  15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16 # See the License for the specific language governing permissions and
  17 # limitations under the License.
  18 #
  19 # Contributors:
  20 # - S-Core Co., Ltd
  21
  22 import logging
  23 import os
  24 import base64
  25 import hashlib
  26 import collections
  27 from lxml import etree
  28 from tic.utils import file
  29 from tic.utils import process
  30 from tic.utils.error import TICError
  31 from tic.utils.grabber import myurlgrab2
  32 from tic.utils import misc
  33 from tic.config import configmgr
  34
  35 def _get_uncompressed_data_from_url(url, filename, proxies=None):
  36     # download file
  37     filename = myurlgrab2(url, filename)
  38     # Check if file compressed or not
  39     if filename.endswith(".gz"):
  40         decompress_filename = os.path.splitext(filename)[0]
  41         filename = file.decompress_gzip(filename, decompress_filename)
  42     elif filename.endswith(".bz2"):
  43         process.run(['bunzip2', "-f", filename])
  44         filename = os.path.splitext(filename)[0]
  45     return filename
  46
  47 def _get_metadata_from_repo(baseurl, proxies, tempdir, cachedir, reponame, filehref,
  48                             sumtype=None, checksum=None):
  49     logger = logging.getLogger(__name__)
  50     url = os.path.join(baseurl, filehref)
  51     filename_tmp = str("%s/%s" % (cachedir, os.path.basename(filehref)))
  52     if os.path.splitext(filename_tmp)[1] in (".gz", ".bz2"):
  53         filename = os.path.splitext(filename_tmp)[0]
  54     else:
  55         filename = filename_tmp
  56     if sumtype and checksum and os.path.exists(filename):
  57         if sumtype == 'sha256':
  58             file_checksum = hashlib.sha256(open(filename, 'rb').read()).hexdigest()
  59         elif sumtype == 'md5':
  60             file_checksum = hashlib.md5(open(filename, 'rb').read()).hexdigest()
  61         else:
  62             sumcmd = "%ssum" % sumtype
  63             result = process.run([sumcmd, filename])[1].strip()
  64             file_checksum = result.split()[0]
  65         # use cached file
  66         if file_checksum and file_checksum == checksum:
  67             logger.info('use a cache file - ' + str(filename))
  68             return filename
  69
  70     temp_file = os.path.join(tempdir, os.path.basename(filehref))
  71     file_path =_get_uncompressed_data_from_url(url, temp_file, proxies)
  72     return file.copyfile_flock(file_path, filename)
  73
  74 def get_repodata_from_repos(repos, cachedir):
  75     logger = logging.getLogger(__name__)
  76     my_repodata = []
  77     temp_path = os.path.join(cachedir, 'temp', str(misc.get_timestamp()))
  78     for repo in repos:
  79         reponame = repo.get('name')
  80         baseurl = repo.get('url')
  81
  82         # make temp_dir
  83         base64url = base64.urlsafe_b64encode(baseurl)
  84         temp_dir = os.path.join(temp_path, base64url);
  85         repomd_file = os.path.join(temp_dir, 'repomd.xml')
  86         file.make_dirs(temp_dir);
  87
  88         #TODO: support local files(local directory)
  89         # local/remote repository
  90         url = os.path.join(baseurl, 'repodata/repomd.xml')
  91         repomd = myurlgrab2(url, repomd_file)
  92
  93         try:
  94             tree = etree.parse(repomd)
  95             root = tree.getroot()
  96         except etree.XMLSyntaxError as e:
  97             logger.info(e)
  98             raise TICError(configmgr.message['xml_parse_error'] % ('repomd.xml', url))
  99
 100         # make cache_dir
 101         repo_checksum = hashlib.sha256(open(repomd_file, 'rb').read()).hexdigest()
 102         cache_dir = os.path.join(cachedir, 'cached', base64url, repo_checksum)
 103         file.make_dirs(cache_dir)
 104
 105         ns = root.tag
 106         ns = ns[0:ns.rindex("}")+1]
 107
 108         filepaths = {}
 109         checksums = {}
 110         sumtypes = {}
 111
 112         for elm in root.findall("%sdata" % ns):
 113             if elm.attrib['type'] == 'patterns':
 114                 filepaths['patterns'] = elm.find("%slocation" % ns).attrib['href']
 115                 checksums['patterns'] = elm.find("%sopen-checksum" % ns).text
 116                 sumtypes['patterns'] = elm.find("%sopen-checksum" % ns).attrib['type']
 117             elif elm.attrib['type'] == 'group':
 118                 filepaths['comps'] = elm.find("%slocation" % ns).attrib['href']
 119                 checksums['comps'] = elm.find("%sopen-checksum" % ns).text
 120                 sumtypes['comps'] = elm.find("%sopen-checksum" % ns).attrib['type']
 121             elif elm.attrib["type"] == 'primary':
 122                 filepaths['primary'] = elm.find("%slocation" % ns).attrib['href']
 123                 checksums['primary'] = elm.find("%sopen-checksum" % ns).text
 124                 sumtypes['primary'] = elm.find("%sopen-checksum" % ns).attrib['type']
 125
 126         for item in ("primary", "patterns", "comps"):
 127             if item not in filepaths:
 128                 filepaths[item] = None
 129                 continue
 130             filepaths[item] = _get_metadata_from_repo(baseurl,
 131                                                       None,
 132                                                       temp_dir,
 133                                                       cache_dir,
 134                                                       reponame,
 135                                                       filepaths[item],
 136                                                       sumtypes[item],
 137                                                       checksums[item])
 138         my_repodata.append({"name":reponame,
 139                             "baseurl":baseurl,
 140                             "checksum":repo_checksum,
 141                             "repomd":repomd,
 142                             "primary":filepaths['primary'],
 143                             "cachedir":cache_dir,
 144                             "proxies":None,
 145                             "patterns":filepaths['patterns'],
 146                             "comps":filepaths['comps']})
 147     return my_repodata
 148
 149
 150 RepoType = collections.namedtuple('Repo', 'name, url')
 151 def Repo(name, baseurl):
 152     return RepoType(name, baseurl)