2 # Copyright (c) 2000 - 2016 Samsung Electronics Co., Ltd. All rights reserved.
5 # @author Chulwoo Shin <cw1.shin@samsung.com>
7 # Licensed under the Apache License, Version 2.0 (the "License");
8 # you may not use this file except in compliance with the License.
9 # You may obtain a copy of the License at
11 # http://www.apache.org/licenses/LICENSE-2.0
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
27 from lxml import etree
28 from tic.utils import file
29 from tic.utils import process
30 from tic.utils.error import TICError
31 from tic.utils.grabber import myurlgrab2
32 from tic.utils import misc
34 def _get_uncompressed_data_from_url(url, filename, proxies=None):
36 filename = myurlgrab2(url, filename)
37 # Check if file compressed or not
38 if filename.endswith(".gz"):
39 decompress_filename = os.path.splitext(filename)[0]
40 filename = file.decompress_gzip(filename, decompress_filename)
41 elif filename.endswith(".bz2"):
42 process.run(['bunzip2', "-f", filename])
43 filename = os.path.splitext(filename)[0]
46 def _get_metadata_from_repo(baseurl, proxies, tempdir, cachedir, reponame, filehref,
47 sumtype=None, checksum=None):
48 logger = logging.getLogger(__name__)
49 url = os.path.join(baseurl, filehref)
50 filename_tmp = str("%s/%s" % (cachedir, os.path.basename(filehref)))
51 if os.path.splitext(filename_tmp)[1] in (".gz", ".bz2"):
52 filename = os.path.splitext(filename_tmp)[0]
54 filename = filename_tmp
55 if sumtype and checksum and os.path.exists(filename):
56 if sumtype == 'sha256':
57 file_checksum = hashlib.sha256(open(filename, 'rb').read()).hexdigest()
58 elif sumtype == 'md5':
59 file_checksum = hashlib.md5(open(filename, 'rb').read()).hexdigest()
61 sumcmd = "%ssum" % sumtype
62 result = process.run([sumcmd, filename])[1].strip()
63 file_checksum = result.split()[0]
65 if file_checksum and file_checksum == checksum:
66 logger.info('use a cache file - ' + str(filename))
69 temp_file = os.path.join(tempdir, os.path.basename(filehref))
70 file_path =_get_uncompressed_data_from_url(url, temp_file, proxies)
71 return file.copyfile_flock(file_path, filename)
73 def get_repodata_from_repos(repos, cachedir):
75 temp_path = os.path.join(cachedir, 'temp', str(misc.get_timestamp()))
77 reponame = repo.get('name')
78 baseurl = repo.get('url')
81 base64url = base64.urlsafe_b64encode(baseurl)
82 temp_dir = os.path.join(temp_path, base64url);
83 repomd_file = os.path.join(temp_dir, 'repomd.xml')
84 file.make_dirs(temp_dir);
86 #TODO: support local files(local directory)
87 # local/remote repository
88 url = os.path.join(baseurl, 'repodata/repomd.xml')
89 repomd = myurlgrab2(url, repomd_file)
92 tree = etree.parse(repomd)
94 except etree.XMLSyntaxError:
95 raise TICError("Unable to parse repomd.xml. Please check the repomd from repository url(%s)", url)
98 repo_checksum = hashlib.sha256(open(repomd_file, 'rb').read()).hexdigest()
99 cache_dir = os.path.join(cachedir, 'cached', base64url, repo_checksum)
100 file.make_dirs(cache_dir)
103 ns = ns[0:ns.rindex("}")+1]
109 for elm in root.findall("%sdata" % ns):
110 if elm.attrib['type'] == 'patterns':
111 filepaths['patterns'] = elm.find("%slocation" % ns).attrib['href']
112 checksums['patterns'] = elm.find("%sopen-checksum" % ns).text
113 sumtypes['patterns'] = elm.find("%sopen-checksum" % ns).attrib['type']
114 elif elm.attrib['type'] == 'group':
115 filepaths['comps'] = elm.find("%slocation" % ns).attrib['href']
116 checksums['comps'] = elm.find("%sopen-checksum" % ns).text
117 sumtypes['comps'] = elm.find("%sopen-checksum" % ns).attrib['type']
118 elif elm.attrib["type"] == 'primary':
119 filepaths['primary'] = elm.find("%slocation" % ns).attrib['href']
120 checksums['primary'] = elm.find("%sopen-checksum" % ns).text
121 sumtypes['primary'] = elm.find("%sopen-checksum" % ns).attrib['type']
123 for item in ("primary", "patterns", "comps"):
124 if item not in filepaths:
125 filepaths[item] = None
127 filepaths[item] = _get_metadata_from_repo(baseurl,
135 my_repodata.append({"name":reponame,
137 "checksum":repo_checksum,
139 "primary":filepaths['primary'],
140 "cachedir":cache_dir,
142 "patterns":filepaths['patterns'],
143 "comps":filepaths['comps']})
147 RepoType = collections.namedtuple('Repo', 'name, url')
148 def Repo(name, baseurl):
149 return RepoType(name, baseurl)