2 # Copyright (c) 2000 - 2016 Samsung Electronics Co., Ltd. All rights reserved.
5 # @author Chulwoo Shin <cw1.shin@samsung.com>
7 # Licensed under the Apache License, Version 2.0 (the "License");
8 # you may not use this file except in compliance with the License.
9 # You may obtain a copy of the License at
11 # http://www.apache.org/licenses/LICENSE-2.0
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
27 from lxml import etree
28 from tic.utils import file
29 from tic.utils import process
30 from tic.utils.error import TICError
31 from tic.utils.grabber import myurlgrab2
32 from tic.utils import misc
34 def _get_uncompressed_data_from_url(url, filename, proxies=None):
36 filename = myurlgrab2(url, filename)
37 # Check if file compressed or not
38 if filename.endswith(".gz"):
39 decompress_filename = os.path.splitext(filename)[0]
40 filename = file.decompress_gzip(filename, decompress_filename)
41 elif filename.endswith(".bz2"):
42 process.run(['bunzip2', "-f", filename])
43 filename = os.path.splitext(filename)[0]
46 def _get_metadata_from_repo(baseurl, proxies, tempdir, cachedir, reponame, filehref,
47 sumtype=None, checksum=None):
48 logger = logging.getLogger(__name__)
49 url = os.path.join(baseurl, filehref)
50 filename_tmp = str("%s/%s" % (cachedir, os.path.basename(filehref)))
51 if os.path.splitext(filename_tmp)[1] in (".gz", ".bz2"):
52 filename = os.path.splitext(filename_tmp)[0]
54 filename = filename_tmp
55 if sumtype and checksum and os.path.exists(filename):
56 if sumtype == 'sha256':
57 file_checksum = hashlib.sha256(open(filename, 'rb').read()).hexdigest()
58 elif sumtype == 'md5':
59 file_checksum = hashlib.md5(open(filename, 'rb').read()).hexdigest()
61 sumcmd = "%ssum" % sumtype
62 result = process.run([sumcmd, filename])[1].strip()
63 file_checksum = result.split()[0]
65 if file_checksum and file_checksum == checksum:
66 logger.info('use a cache file - ' + str(filename))
69 temp_file = os.path.join(tempdir, os.path.basename(filehref))
70 file_path =_get_uncompressed_data_from_url(url, temp_file, proxies)
71 return file.copyfile_flock(file_path, filename)
73 def get_repodata_from_repos(repos, cachedir):
75 temp_path = os.path.join(cachedir, 'temp', str(misc.get_timestamp()))
77 reponame = repo.get('name')
78 baseurl = repo.get('url')
81 base64url = base64.urlsafe_b64encode(baseurl)
82 temp_dir = os.path.join(temp_path, base64url);
83 repomd_file = os.path.join(temp_dir, 'repomd.xml')
84 file.make_dirs(temp_dir);
87 url = os.path.join(baseurl, 'repodata/repomd.xml')
88 repomd = myurlgrab2(url, repomd_file)
91 tree = etree.parse(repomd)
93 except etree.XMLSyntaxError:
94 raise TICError("repomd.xml syntax error.")
97 repo_checksum = hashlib.sha256(open(repomd_file, 'rb').read()).hexdigest();
98 cache_dir = os.path.join(cachedir, 'cached', base64url, repo_checksum)
99 file.make_dirs(cache_dir)
102 ns = ns[0:ns.rindex("}")+1]
108 for elm in root.findall("%sdata" % ns):
109 if elm.attrib['type'] == 'patterns':
110 filepaths['patterns'] = elm.find("%slocation" % ns).attrib['href']
111 checksums['patterns'] = elm.find("%sopen-checksum" % ns).text
112 sumtypes['patterns'] = elm.find("%sopen-checksum" % ns).attrib['type']
113 elif elm.attrib['type'] == 'group':
114 filepaths['comps'] = elm.find("%slocation" % ns).attrib['href']
115 checksums['comps'] = elm.find("%sopen-checksum" % ns).text
116 sumtypes['comps'] = elm.find("%sopen-checksum" % ns).attrib['type']
117 elif elm.attrib["type"] == 'primary':
118 filepaths['primary'] = elm.find("%slocation" % ns).attrib['href']
119 checksums['primary'] = elm.find("%sopen-checksum" % ns).text
120 sumtypes['primary'] = elm.find("%sopen-checksum" % ns).attrib['type']
122 for item in ("primary", "patterns", "comps"):
123 if item not in filepaths:
124 filepaths[item] = None
126 filepaths[item] = _get_metadata_from_repo(baseurl,
134 my_repodata.append({"name":reponame,
136 "checksum":repo_checksum,
138 "primary":filepaths['primary'],
139 "cachedir":cache_dir,
141 "patterns":filepaths['patterns'],
142 "comps":filepaths['comps']})
146 RepoType = collections.namedtuple('Repo', 'name, url')
147 def Repo(name, baseurl):
148 return RepoType(name, baseurl)