tic/repo.py

   1 #!/usr/bin/python
   2 # Copyright (c) 2016 Samsung Electronics Co., Ltd
   3 #
   4 # Licensed under the Flora License, Version 1.1 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #     http://floralicense.org/license/
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 # See the License for the specific language governing permissions and
  14 # limitations under the License.
  15 #
  16 # Contributors:
  17 # - S-Core Co., Ltd
  18
  19 import logging
  20 import os
  21 import base64
  22 import hashlib
  23 import collections
  24 from lxml import etree
  25 from tic.utils import file
  26 from tic.utils import process
  27 from tic.utils.error import TICError
  28 from tic.utils.grabber import myurlgrab2
  29 from tic.utils import misc
  30 from tic.config import configmgr
  31
  32 REPOMD_EL_PRIMARY = 'primary'
  33 REPOMD_EL_PATTERNS = 'patterns'
  34 REPOMD_EL_COMPS = 'comps'
  35 REPOMD_EL_GROUP = 'group'
  36 REPOMD_EL_TYPE = 'type'
  37 REPOMD_ATTRIB_LOCATION = '%slocation'
  38 REPOMD_ATTRIB_LOCATION = '%sopen-checksum'
  39
  40 def _get_uncompressed_data_from_url(url, filename, proxies=None):
  41     # download file
  42     filename = myurlgrab2(url, filename)
  43     # Check if file compressed or not
  44     if filename.endswith(".gz"):
  45         decompress_filename = os.path.splitext(filename)[0]
  46         filename = file.decompress_gzip(filename, decompress_filename)
  47     elif filename.endswith(".bz2"):
  48         process.run(['bunzip2', "-f", filename])
  49         filename = os.path.splitext(filename)[0]
  50     return filename
  51
  52 def _get_repodata(baseurl, proxies, tempdir, cachedir, reponame, filehref,
  53                             sumtype=None, checksum=None):
  54     logger = logging.getLogger(__name__)
  55     url = os.path.join(baseurl, filehref)
  56     filename_tmp = str("%s/%s" % (cachedir, os.path.basename(filehref)))
  57     if os.path.splitext(filename_tmp)[1] in (".gz", ".bz2"):
  58         filename = os.path.splitext(filename_tmp)[0]
  59     else:
  60         filename = filename_tmp
  61     if sumtype and checksum and os.path.exists(filename):
  62         if sumtype == 'sha256':
  63             file_checksum = hashlib.sha256(open(filename, 'rb').read()).hexdigest()
  64         elif sumtype == 'md5':
  65             file_checksum = hashlib.md5(open(filename, 'rb').read()).hexdigest()
  66         else:
  67             sumcmd = "%ssum" % sumtype
  68             result = process.run([sumcmd, filename])[1].strip()
  69             file_checksum = result.split()[0]
  70         # use cached file
  71         if file_checksum and file_checksum == checksum:
  72             logger.info('use a cache file - ' + str(filename))
  73             return filename
  74
  75     temp_file = os.path.join(tempdir, os.path.basename(filehref))
  76     file_path =_get_uncompressed_data_from_url(url, temp_file, proxies)
  77     return file.copyfile_flock(file_path, filename)
  78
  79 def get_repodata_from_repos(repos, cachedir):
  80     logger = logging.getLogger(__name__)
  81
  82     def _set_attrib(ns, key, element):
  83             fpath_info[key] = element.find(''.join([ns, 'location'])).attrib['href']
  84             checksum = element.find(''.join([ns, 'open-checksum']))
  85             checksum_info[key] = checksum.text
  86             sumtype_info[key] = checksum.attrib['type']
  87
  88     repodata = []
  89     temp_path = os.path.join(cachedir, 'temp', str(misc.get_timestamp()))
  90     for repo in repos:
  91         reponame = repo.get('name')
  92         baseurl = repo.get('url')
  93
  94         # make temp_dir
  95         base64url = base64.urlsafe_b64encode(baseurl)
  96         temp_dir = os.path.join(temp_path, base64url);
  97         repomd_file = os.path.join(temp_dir, 'repomd.xml')
  98         file.make_dirs(temp_dir);
  99
 100         #TODO: support local files(local directory)
 101         # local/remote repository
 102         url = os.path.join(baseurl, 'repodata/repomd.xml')
 103         repomd = myurlgrab2(url, repomd_file)
 104
 105         try:
 106             tree = etree.parse(repomd)
 107             t_root = tree.getroot()
 108         except etree.XMLSyntaxError as e:
 109             logger.info(e)
 110             raise TICError(configmgr.message['xml_parse_error'] % ('repomd.xml', url))
 111
 112         # make cache_dir
 113         repo_checksum = hashlib.sha256(open(repomd_file, 'rb').read()).hexdigest()
 114         cache_dir = os.path.join(cachedir, 'cached', base64url, repo_checksum)
 115         file.make_dirs(cache_dir)
 116
 117         fpath_info = dict()
 118         checksum_info = dict()
 119         sumtype_info = dict()
 120
 121         namespace = t_root.tag
 122         namespace = namespace[0:namespace.rindex('}')+1]
 123
 124         for element in t_root.findall(''.join([namespace, 'data'])):
 125             if element.attrib[REPOMD_EL_TYPE] == REPOMD_EL_GROUP:
 126                 # group(comps)
 127                 _set_attrib(namespace, REPOMD_EL_COMPS, element)
 128             else:
 129                 # type: primary, patterns
 130                 _set_attrib(namespace, element.attrib[REPOMD_EL_TYPE], element)
 131
 132         for i_name in [REPOMD_EL_PRIMARY, REPOMD_EL_PATTERNS, REPOMD_EL_COMPS]:
 133             if i_name in fpath_info:
 134                 fpath_info[i_name] = _get_repodata(baseurl,
 135                                              None,
 136                                              temp_dir,
 137                                              cache_dir,
 138                                              reponame,
 139                                              fpath_info[i_name],
 140                                              sumtype_info[i_name],
 141                                              checksum_info[i_name])
 142             else:
 143                 fpath_info[i_name] = None
 144
 145         repodata.append({"name":reponame,
 146                             "baseurl":baseurl,
 147                             "checksum":repo_checksum,
 148                             "repomd":repomd,
 149                             "primary":fpath_info['primary'],
 150                             "cachedir":cache_dir,
 151                             "proxies":None,
 152                             "patterns":fpath_info['patterns'],
 153                             "comps":fpath_info['comps']})
 154     return repodata
 155
 156
 157 RepoType = collections.namedtuple('Repo', 'name, url')
 158 def Repo(name, baseurl):
 159     return RepoType(name, baseurl)