From 3c368184c138a3c0268aedae3f24edace66deae4 Mon Sep 17 00:00:00 2001 From: Aleksander Mistewicz Date: Fri, 4 Nov 2016 10:30:10 +0100 Subject: [PATCH] Add tsp/scripts/download_all.py Change-Id: I7125d71ac3be8607a97a5aeecdf6bca83667f8fa Signed-off-by: Aleksander Mistewicz --- tsp/scripts/download_all.py | 319 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 319 insertions(+) create mode 100755 tsp/scripts/download_all.py diff --git a/tsp/scripts/download_all.py b/tsp/scripts/download_all.py new file mode 100755 index 0000000..82c06f2 --- /dev/null +++ b/tsp/scripts/download_all.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright (c) 2016 Samsung Electronics Co., Ltd All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## +# @author Aleksander Mistewicz + +import os +import subprocess +import time +import re +import requests +import argparse +import logging +import bs4 +import threading +import signal + +__version__ = "0.0.1" +__license__ = "APACHE-2.0" +__author__ = "Aleksander Mistewicz" +__author_email__ = "a.mistewicz@samsung.com" + +USAGE = "%prog " + +AGENT = "%s/%s" % (__name__, __version__) + + +class ImageVersion(object): + + def __init__(self, url): + names = re.findall(r'tizen\-.{0,7}\w+\d{8}\.\d+', url) + if len(names) >= 1: + self.name = names[0] + else: + raise ValueError + versions = re.findall(r'\d{8}\.\d+', url) + if len(versions) == 3: + if versions[0] != versions[1]: + raise ValueError + self.snapshot = versions[0] + self.submission = versions[2] + elif len(versions) == 1: + self.snapshot = versions[0] + self.submission = None + else: + raise ValueError + + def get_version(self): + if self.submission: + return '.'.join([self.snapshot, self.submission]) + else: + return self.snapshot + + def get_snapshot(self): + return self.snapshot + + def get_submission(self): + return self.submission + + def get_name(self): + if self.submission: + return '.'.join([self.name, self.submission]) + else: + return self.name + + def is_prerelease(self): + if self.submission: + return True + else: + return False + + +class Crawler(object): + + @classmethod + def get_links(self, session, url): + main = session.get(url) + soup = bs4.BeautifulSoup(main.text, 'html.parser') + links = set() + for link in soup.find_all('a'): + links.add(link.get('href')) + return links + + @classmethod + def get_targets(self, url): + url += "images/" + s = requests.Session() + return self.crawl_targets(s, url) + + @classmethod + def crawl_targets(self, session, url): + links = Crawler.get_links(session, url) + discovered = set() + for link in links: + if not link.startswith("/") and link.endswith("/") and not "../" in link: + logging.debug("Add link to discovered: %s", link) + discovered |= self.crawl_targets(session, url + link) + else: + if link == "MD5SUMS": + discovered.add(url) + return discovered + + @classmethod + def crawl_images(self, session, url): + links = Crawler.get_links(session, url) + discovered = set() + for link in links: + if link == "MD5SUMS" \ + or link.endswith(".tar.gz") \ + or link.endswith(".ks") \ + or link.endswith(".packages") \ + or link.endswith(".xml") \ + or link.endswith(".bmap") \ + or link.endswith(".raw.bz2") \ + or link.endswith("-default") \ + or link.endswith(".log"): + discovered.add(url + link) + return discovered + + +class Downloader(threading.Thread): + + def __init__(self, work, img_ver, session, url): + self.work = work + threading.Thread.__init__(self) + self.url = url + self.session = session + self.img_ver = img_ver + self.is_prerelease = img_ver.is_prerelease() + m = re.search(r'.*/(.*)/$', url) + self.name = m.group(1) + try: + os.mkdir(self.name) + except OSError as e: + logging.warn("mkdir %s: %s" % (self.name, e.strerror)) + self.diff_report_filename = self.name + "/diff.report" + + def run(self): + logging.info("Start downloader: %s" % self.name) + self.files = Crawler.crawl_images(self.session, self.url) + logging.debug(self.files) + + for url in frozenset(self.files): + if url.endswith(".packages"): + pre_url = url + self.files.discard(url) + elif url.endswith("/MD5SUMS"): + md5sums = url + self.files.discard(url) + + if not self.is_prerelease: + self.write_diff_for_snapshot() + else: + # Replace prerelease with snapshots + snap_url = re.sub('prerelease', 'snapshots', pre_url) + # Remove prerelease subdirectory + snap_url = re.sub("/[^/]*" + self.img_ver.get_submission() + "/", '/', snap_url) + # Remove SR from filename + snap_url = re.sub("\." + self.img_ver.get_submission(), '', snap_url) + logging.info("snap: %s" % snap_url) + + snap = self.session.get(snap_url) + pre = self.session.get(pre_url) + if self.check_diff(pre.text, snap.text): + return + + while self.work.is_set(): + sub_dwns = set() + for url in self.files: + sub_dwns.add(subprocess.Popen(["wget", "-cq", url], cwd=self.name)) + for sub_dwn in sub_dwns: + sub_dwn.wait() + if self.check_md5(md5sums): + break + if self.work.is_set(): + time.sleep(10) + + logging.info("Stop downloader: %s" % self.name) + + def check_diff(self, pre_pkgs, snap_pkgs): + logging.debug("Checking diff") + set_snap_pkgs = set(snap_pkgs.splitlines()) + set_pre_pkgs = set(pre_pkgs.splitlines()) + diff = set_pre_pkgs ^ set_snap_pkgs + with open(self.diff_report_filename, 'w') as f: + ret = (len(diff) == 0) + if ret: + s = 'Images are identical' + else: + s = '\n'.join(diff) + logging.info(s) + f.write(s) + return ret + + def write_diff_for_snapshot(self): + logging.debug("Write diff for snapshot image") + with open(self.diff_report_filename, 'w') as f: + f.write('Snapshot') + + def check_md5(self, md5sum_url): + logging.debug("Checking md5sum") + md5_file = "md5sums" + md5_path = self.name + "/" + md5_file + subprocess.call(["wget", md5sum_url, "-qO", md5_path]) + subprocess.call(["sed", "-e", "/\(ks\|json\|log\|xml\|-default\|packages\)/d", "-i", md5_path]) + p = subprocess.Popen(["md5sum", "-c", md5_file], cwd=self.name) + p.wait() + ret = p.returncode + if not ret: + logging.info("Checksum OK") + os.remove(md5_path) + else: + logging.warn("Checksum FAILED\nRemoving files mentioned in md5sums file") + with open(md5_path, 'r') as f: + for i in f: + try: + os.remove(self.name + "/" + i) + except OSError as e: + logging.warn("rm: %s" % e.strerror) + os.remove(md5_path) + return False + return True + + +class ImageDownloader(object): + + def __init__(self, url, dry): + self.url = url + self.dry = dry + + self.img_ver = ImageVersion(url) + self.diff_report_filename = "diff.report" + + logging.debug('snapshot number: %s', self.img_ver.get_snapshot()) + logging.debug('version number: %s', self.img_ver.get_version()) + + self.urls = Crawler.get_targets(self.url) + + # Postcondition + logging.debug("Files to download: %s", self.urls) + + if self.dry: + logging.debug("Skipping run") + else: + self.create_projectconf("N/A", "N/A"); + self.run() + + def run(self): + logging.debug("Dispatching downloaders...") + s = requests.Session() + downloaders = set() + def handler(signum, frame): + logging.info("SIGINT") + work.clear() + + work = threading.Event() + work.set() + signal.signal(signal.SIGINT, handler) + for url in self.urls: + dwn = Downloader(work, self.img_ver, s, url) + dwn.start() + downloaders.add(dwn) + for dwn in downloaders: + dwn.join() + + def create_projectconf(self, arch, target_name): + logging.debug("Create project.conf file for: %s %s", arch, target_name) + if self.dry: + return + prjconf = [ self.img_ver.get_name(), arch, target_name ] + with open("project.conf", 'w') as f: + f.write('\n'.join(prjconf) + '\n') + + +def parse_arguments(): + parser = argparse.ArgumentParser(description="Image downloader for download.tizen.org") + + parser.add_argument("url", metavar='', type=str, + help='URL of prerelease or snapshot to download images from.') + + parser.add_argument("-d", "--dry-run", + action="store_true", dest="dry", + help="Dry run - do not actually download images") + + parser.add_argument("-l", "--log", + action="store", dest="loglevel", + help="Verbosity level") + + args = parser.parse_args() + + return args + +def main(): + args = parse_arguments() + if args.loglevel: + numeric_level = getattr(logging, args.loglevel.upper(), None) + if not isinstance(numeric_level, int): + raise ValueError('Invalid log level: %s' % args.loglevel) + logging.basicConfig(format='%(asctime)s %(message)s',level=numeric_level) + logging.debug("Begin") + ImageDownloader(args.url, args.dry) + logging.debug("End") + +if __name__ == '__main__': + main() -- 2.7.4