From 72049c8e959f540fc481a4210e62ba4053eb2f8c Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 4 Nov 2013 10:24:55 +0200 Subject: [PATCH] tests: add old code-base I am going to add a test which verifies that older BmapCopy work fine with newer compatible bmap formats, as well as newer BmapCopy works fine with all the older bmap formats. This patch simply add a copy of various BmapCopy versions. Change-Id: I37941f343b56511a85a47cda239dd702b7d2afc0 Signed-off-by: Artem Bityutskiy --- tests/oldcodebase/BmapCopy1_0.py | 707 ++++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_0.py | 631 +++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_1.py | 631 +++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_2.py | 632 +++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_3.py | 667 +++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_4.py | 667 +++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_5.py | 724 +++++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy2_6.py | 724 +++++++++++++++++++++++++++++ tests/oldcodebase/BmapCopy3_0.py | 764 +++++++++++++++++++++++++++++++ tests/oldcodebase/__init__.py | 0 10 files changed, 6147 insertions(+) create mode 100644 tests/oldcodebase/BmapCopy1_0.py create mode 100644 tests/oldcodebase/BmapCopy2_0.py create mode 100644 tests/oldcodebase/BmapCopy2_1.py create mode 100644 tests/oldcodebase/BmapCopy2_2.py create mode 100644 tests/oldcodebase/BmapCopy2_3.py create mode 100644 tests/oldcodebase/BmapCopy2_4.py create mode 100644 tests/oldcodebase/BmapCopy2_5.py create mode 100644 tests/oldcodebase/BmapCopy2_6.py create mode 100644 tests/oldcodebase/BmapCopy3_0.py create mode 100644 tests/oldcodebase/__init__.py diff --git a/tests/oldcodebase/BmapCopy1_0.py b/tests/oldcodebase/BmapCopy1_0.py new file mode 100644 index 0000000..9722a1f --- /dev/null +++ b/tests/oldcodebase/BmapCopy1_0.py @@ -0,0 +1,707 @@ +""" This module implements copying of images with bmap and provides the +following API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. """ + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# * Too many statements (R0915) +# * Too many branches (R0912) +# pylint: disable=R0902 +# pylint: disable=R0915 +# pylint: disable=R0912 + +import os +import stat +import sys +import hashlib +import Queue +import thread +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# A list of supported image formats +SUPPORTED_IMAGE_FORMATS = ('bz2', 'gz', 'tar.gz', 'tgz', 'tar.bz2') + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = 1 + +class Error(Exception): + """ A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. """ + pass + +class BmapCopy: + """ This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file-like object of the destination file copy the image to + * full path or a file-like object of the bmap file (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + The image file may either be an uncompressed raw image or a compressed + image. Compression type is defined by the image file extension. Supported + types are listed by 'SUPPORTED_IMAGE_FORMATS'. + + IMPORTANT: if the image is given as a file-like object, the compression + type recognition is not performed - the file-like object's 'read()' method + is used directly instead. + + Once an instance of 'BmapCopy' is created, all the 'bmap_*' attributes are + initialized and available. They are read from the bmap. + + However, if bmap was not provided, this is not always the case and some of + the 'bmap_*' attributes are not initialize by the class constructor. + Instead, they are initialized only in the 'copy()' method. The reason for + this is that when bmap is absent, 'BmapCopy' uses sensible fall-back values + for the 'bmap_*' attributes assuming the entire image is "mapped". And if + the image is compressed, it cannot easily find out the image size. Thus, + this is postponed until the 'copy()' method decompresses the image for the + first time. + + The 'copy()' method implements the copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. """ + + def _initialize_sizes(self, image_size): + """ This function is only used when the there is no bmap. It + initializes attributes like 'blocks_cnt', 'mapped_cnt', etc. Normally, + the values are read from the bmap file, but in this case they are just + set to something reasonable. """ + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + + def _parse_bmap(self): + """ Parse the bmap file and initialize the 'bmap_*' attributes. """ + + bmap_pos = self._f_bmap.tell() + self._f_bmap.seek(0) + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " \ + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + major = int(self.bmap_version.split('.', 1)[0]) + if major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " \ + "version %d is not supported" \ + % (SUPPORTED_BMAP_VERSION, major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " \ + "blocks count (%d bytes != %d blocks * %d bytes)" \ + % (self.image_size, self.blocks_cnt, self.block_size)) + + self._f_bmap.seek(bmap_pos) + + def _open_image_file(self): + """ Open the image file which may be compressed or not. The compression + type is recognized by the file extension. Supported types are defined + by 'SUPPORTED_IMAGE_FORMATS'. """ + + try: + is_regular_file = stat.S_ISREG(os.stat(self._image_path).st_mode) + except OSError as err: + raise Error("cannot access image file '%s': %s" \ + % (self._image_path, err.strerror)) + + if not is_regular_file: + raise Error("image file '%s' is not a regular file" \ + % self._image_path) + + try: + if self._image_path.endswith('.tar.gz') \ + or self._image_path.endswith('.tar.bz2') \ + or self._image_path.endswith('.tgz'): + import tarfile + + tar = tarfile.open(self._image_path, 'r') + # The tarball is supposed to contain only one single member + members = tar.getnames() + if len(members) > 1: + raise Error("the image tarball '%s' contains more than " \ + "one file" % self._image_path) + elif len(members) == 0: + raise Error("the image tarball '%s' is empty (no files)" \ + % self._image_path) + self._f_image = tar.extractfile(members[0]) + elif self._image_path.endswith('.gz'): + import gzip + self._f_image = gzip.GzipFile(self._image_path, 'rb') + elif self._image_path.endswith('.bz2'): + import bz2 + self._f_image = bz2.BZ2File(self._image_path, 'rb') + else: + self._image_is_compressed = False + self._f_image = open(self._image_path, 'rb') + except IOError as err: + raise Error("cannot open image file '%s': %s" \ + % (self._image_path, err)) + + self._f_image_needs_close = True + + def _validate_image_size(self): + """ Make sure that image size from bmap matches real image size. """ + + image_size = os.fstat(self._f_image.fileno()).st_size + if image_size != self.image_size: + raise Error("Size mismatch, bmap '%s' was created for an image " \ + "of size %d bytes, but image '%s' has size %d bytes" \ + % (self._bmap_path, self.image_size, + self._image_path, image_size)) + + def _open_destination_file(self): + """ Open the destination file. """ + + try: + self._f_dest = open(self._dest_path, 'w') + except IOError as err: + raise Error("cannot open destination file '%s': %s" \ + % (self._dest_path, err)) + + self._f_dest_needs_close = True + + def _open_bmap_file(self): + """ Open the bmap file. """ + + try: + self._f_bmap = open(self._bmap_path, 'r') + except IOError as err: + raise Error("cannot open bmap file '%s': %s" \ + % (self._bmap_path, err.strerror)) + + self._f_bmap_needs_close = True + + def __init__(self, image, dest, bmap = None): + """ The class constructor. The parameters are: + image - full path or file object of the image which should be copied + dest - full path or file-like object of the destination file to + copy the image to + bmap - full path or file-like object of the bmap file to use for + copying """ + + self._xml = None + self._image_is_compressed = True + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_dest_needs_close = False + self._f_image_needs_close = False + self._f_bmap_needs_close = False + + self._f_bmap = None + self._f_bmap_path = None + + if hasattr(dest, "write"): + self._f_dest = dest + self._dest_path = dest.name + else: + self._dest_path = dest + self._open_destination_file() + + if hasattr(image, "read"): + self._f_image = image + self._image_path = image.name + else: + self._image_path = image + self._open_image_file() + + st_mode = os.fstat(self._f_dest.fileno()).st_mode + self._dest_is_regfile = stat.S_ISREG(st_mode) + + if bmap: + if hasattr(bmap, "read"): + self._f_bmap = bmap + self._bmap_path = bmap.name + else: + self._bmap_path = bmap + self._open_bmap_file() + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + # We can initialize size-related attributes only if we the image is + # uncompressed. + if not self._image_is_compressed: + image_size = os.fstat(self._f_image.fileno()).st_size + self._initialize_sizes(image_size) + + if not self._image_is_compressed: + self._validate_image_size() + + self._batch_blocks = self._batch_bytes / self.block_size + + def __del__(self): + """ The class destructor which closes the opened files. """ + + if self._f_image_needs_close: + self._f_image.close() + if self._f_dest_needs_close: + self._f_dest.close() + if self._f_bmap_needs_close: + self._f_bmap.close() + + def _get_block_ranges(self): + """ This is a helper generator that parses the bmap XML file and for + each block range in the XML file it yields ('first', 'last', 'sha1') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown (the image is + compressed), the generator infinitely yields continuous ranges of + size '_batch_blocks'. """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ This is a helper generator which splits block ranges from the bmap + file to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ This is generator which reads the image file in '_batch_blocks' + chunks and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " \ + "image file '%s': %s" \ + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " \ + "calculated %s, should be %s" \ + % (first, last, hash_obj.hexdigest(), sha1)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + self._batch_queue.put(("error", sys.exc_info())) + + self._batch_queue.put(None) + + def copy(self, sync = True, verify = True): + """ Copy the image to the destination file using bmap. The sync + argument defines whether the destination file has to be synchronized + upon return. The 'verify' argument defines whether the SHA1 checksum + has to be verified while copying. """ + + # Save file positions in order to restore them at the end + image_pos = self._f_image.tell() + dest_pos = self._f_dest.tell() + if self._f_bmap: + bmap_pos = self._f_bmap.tell() + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[0], exc_info[1], exc_info[2] + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" \ + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + if not self.image_size: + # The image size was unknown up until now, probably because this is + # a compressed image. Initialize the corresponding class attributes + # now, when we know the size. + self._initialize_sizes(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks, but should have %u - inconsistent " \ + "bmap file" % (blocks_written, self.mapped_cnt)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" \ + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + # Restore file positions + self._f_image.seek(image_pos) + self._f_dest.seek(dest_pos) + if self._f_bmap: + self._f_bmap.seek(bmap_pos) + + def sync(self): + """ Synchronize the destination file to make sure all the data are + actually written to the disk. """ + + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " \ + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ This class is a specialized version of 'BmapCopy' which copies the + image to a block device. Unlike the base 'BmapCopy' class, this class does + various optimizations specific to block devices, e.g., switching to the + 'noop' I/O scheduler. """ + + def _open_destination_file(self): + """ Open the block device in exclusive mode. """ + + try: + self._f_dest = os.open(self._dest_path, os.O_WRONLY | os.O_EXCL) + except OSError as err: + raise Error("cannot open block device '%s' in exclusive mode: %s" \ + % (self._dest_path, err.strerror)) + + try: + os.fstat(self._f_dest).st_mode + except OSError as err: + raise Error("cannot access block device '%s': %s" \ + % (self._dest_path, err.strerror)) + + # Turn the block device file descriptor into a file object + try: + self._f_dest = os.fdopen(self._f_dest, "wb") + except OSError as err: + os.close(self._f_dest) + raise Error("cannot open block device '%s': %s" \ + % (self._dest_path, err)) + + self._f_dest_needs_close = True + + def _tune_block_device(self): + """" Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError: + # No problem, this is just an optimization. + return + + # The file contains a list of scheduler with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the current scheduler name + import re + + match = re.match(r'.*\[(.+)\].*', contents) + self._old_scheduler_value = match.group(1) + + # Limit the write buffering + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError: + return + + def _restore_bdev_settings(self): + """ Restore old block device settings which we changed in + '_tune_block_device()'. """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError: + # No problem, this is just an optimization. + return + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError: + return + + def copy(self, sync = True, verify = True): + """ The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. """ + + try: + self._tune_block_device() + BmapCopy.copy(self, sync, verify) + except: + self._restore_bdev_settings() + raise + + def __init__(self, image, dest, bmap = None): + """ The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known (i.e., it is not compressed) - check that + # it fits the block device. + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " \ + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " \ + "fit the block device '%s' which has %s capacity" \ + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + try: + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + except OSError: + # No problem, this is just an optimization. + pass + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_0.py b/tests/oldcodebase/BmapCopy2_0.py new file mode 100644 index 0000000..7082287 --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_0.py @@ -0,0 +1,631 @@ +""" This module implements copying of images with bmap and provides the +following API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. """ + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import Queue +import thread +import datetime +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = 1 + +class Error(Exception): + """ A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. """ + pass + +class BmapCopy: + """ This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file-like object of the destination file copy the image to + * full path or a file-like object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. """ + + def set_progress_indicator(self, file_obj, format_string): + """ Setup the progress indicator which shows how much data has been + copied in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ Set image size and initialize various other geometry-related + attributes. """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " \ + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _parse_bmap(self): + """ Parse the bmap file and initialize corresponding class instance + attributs. """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " \ + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + major = int(self.bmap_version.split('.', 1)[0]) + if major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " \ + "version %d is not supported" \ + % (SUPPORTED_BMAP_VERSION, major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " \ + "blocks count (%d bytes != %d blocks * %d bytes)" \ + % (self.image_size, self.blocks_cnt, self.block_size)) + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file-like object of the destination file to copy the + image to. + bmap - file-like object of the bmap file to use for copying. + image_size - size of the image in bytes. """ + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def _update_progress(self, blocks_written): + """ Print the progress indicator if the mapped area size is known and + if the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds = 250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ This is a helper generator that parses the bmap XML file and for + each block range in the XML file it yields ('first', 'last', 'sha1') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ This is a helper generator which splits block ranges from the bmap + file to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ This is generator which reads the image file in '_batch_blocks' + chunks and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " \ + "image file '%s': %s" \ + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " \ + "calculated %s, should be %s (image file %s)" \ + % (first, last, hash_obj.hexdigest(), \ + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + self._batch_queue.put(("error", sys.exc_info())) + + self._batch_queue.put(None) + + def copy(self, sync = True, verify = True): + """ Copy the image to the destination file using bmap. The 'sync' + argument defines whether the destination file has to be synchronized + upon return. The 'verify' argument defines whether the SHA1 checksum + has to be verified while copying. """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[0], exc_info[1], exc_info[2] + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" \ + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " \ + "have %u - inconsistent bmap file '%s'" \ + % (blocks_written, self._image_path, self._dest_path, \ + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" \ + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ Synchronize the destination file to make sure all the data are + actually written to the disk. """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " \ + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ This class is a specialized version of 'BmapCopy' which copies the + image to a block device. Unlike the base 'BmapCopy' class, this class does + various optimizations specific to block devices, e.g., switching to the + 'noop' I/O scheduler. """ + + def _tune_block_device(self): + """" Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + # No problem, this is just an optimization + raise Error("cannot enable the 'noop' I/O scheduler: %s" % err) + + # The file contains a list of scheduler with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the current scheduler name + import re + + match = re.match(r'.*\[(.+)\].*', contents) + self._old_scheduler_value = match.group(1) + + # Limit the write buffering + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + raise Error("cannot set max. I/O ratio to '1': %s" % err) + + def _restore_bdev_settings(self): + """ Restore old block device settings which we changed in + '_tune_block_device()'. """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" \ + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" \ + % (self._old_max_ratio_value, err)) + + def copy(self, sync = True, verify = True): + """ The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " \ + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " \ + "fit the block device '%s' which has %s capacity" \ + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_1.py b/tests/oldcodebase/BmapCopy2_1.py new file mode 100644 index 0000000..7082287 --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_1.py @@ -0,0 +1,631 @@ +""" This module implements copying of images with bmap and provides the +following API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. """ + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import Queue +import thread +import datetime +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = 1 + +class Error(Exception): + """ A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. """ + pass + +class BmapCopy: + """ This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file-like object of the destination file copy the image to + * full path or a file-like object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. """ + + def set_progress_indicator(self, file_obj, format_string): + """ Setup the progress indicator which shows how much data has been + copied in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ Set image size and initialize various other geometry-related + attributes. """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " \ + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _parse_bmap(self): + """ Parse the bmap file and initialize corresponding class instance + attributs. """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " \ + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + major = int(self.bmap_version.split('.', 1)[0]) + if major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " \ + "version %d is not supported" \ + % (SUPPORTED_BMAP_VERSION, major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " \ + "blocks count (%d bytes != %d blocks * %d bytes)" \ + % (self.image_size, self.blocks_cnt, self.block_size)) + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file-like object of the destination file to copy the + image to. + bmap - file-like object of the bmap file to use for copying. + image_size - size of the image in bytes. """ + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def _update_progress(self, blocks_written): + """ Print the progress indicator if the mapped area size is known and + if the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds = 250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ This is a helper generator that parses the bmap XML file and for + each block range in the XML file it yields ('first', 'last', 'sha1') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ This is a helper generator which splits block ranges from the bmap + file to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ This is generator which reads the image file in '_batch_blocks' + chunks and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " \ + "image file '%s': %s" \ + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " \ + "calculated %s, should be %s (image file %s)" \ + % (first, last, hash_obj.hexdigest(), \ + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + self._batch_queue.put(("error", sys.exc_info())) + + self._batch_queue.put(None) + + def copy(self, sync = True, verify = True): + """ Copy the image to the destination file using bmap. The 'sync' + argument defines whether the destination file has to be synchronized + upon return. The 'verify' argument defines whether the SHA1 checksum + has to be verified while copying. """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[0], exc_info[1], exc_info[2] + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" \ + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " \ + "have %u - inconsistent bmap file '%s'" \ + % (blocks_written, self._image_path, self._dest_path, \ + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" \ + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ Synchronize the destination file to make sure all the data are + actually written to the disk. """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " \ + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ This class is a specialized version of 'BmapCopy' which copies the + image to a block device. Unlike the base 'BmapCopy' class, this class does + various optimizations specific to block devices, e.g., switching to the + 'noop' I/O scheduler. """ + + def _tune_block_device(self): + """" Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + # No problem, this is just an optimization + raise Error("cannot enable the 'noop' I/O scheduler: %s" % err) + + # The file contains a list of scheduler with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the current scheduler name + import re + + match = re.match(r'.*\[(.+)\].*', contents) + self._old_scheduler_value = match.group(1) + + # Limit the write buffering + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + raise Error("cannot set max. I/O ratio to '1': %s" % err) + + def _restore_bdev_settings(self): + """ Restore old block device settings which we changed in + '_tune_block_device()'. """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" \ + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" \ + % (self._old_max_ratio_value, err)) + + def copy(self, sync = True, verify = True): + """ The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " \ + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " \ + "fit the block device '%s' which has %s capacity" \ + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_2.py b/tests/oldcodebase/BmapCopy2_2.py new file mode 100644 index 0000000..76c28f7 --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_2.py @@ -0,0 +1,632 @@ +""" This module implements copying of images with bmap and provides the +following API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. """ + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import Queue +import thread +import datetime +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = 1 + +class Error(Exception): + """ A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. """ + pass + +class BmapCopy: + """ This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file-like object of the destination file copy the image to + * full path or a file-like object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. """ + + def set_progress_indicator(self, file_obj, format_string): + """ Setup the progress indicator which shows how much data has been + copied in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ Set image size and initialize various other geometry-related + attributes. """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " \ + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _parse_bmap(self): + """ Parse the bmap file and initialize corresponding class instance + attributs. """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " \ + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + major = int(self.bmap_version.split('.', 1)[0]) + if major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " \ + "version %d is not supported" \ + % (SUPPORTED_BMAP_VERSION, major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " \ + "blocks count (%d bytes != %d blocks * %d bytes)" \ + % (self.image_size, self.blocks_cnt, self.block_size)) + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file-like object of the destination file to copy the + image to. + bmap - file-like object of the bmap file to use for copying. + image_size - size of the image in bytes. """ + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def _update_progress(self, blocks_written): + """ Print the progress indicator if the mapped area size is known and + if the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds = 250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ This is a helper generator that parses the bmap XML file and for + each block range in the XML file it yields ('first', 'last', 'sha1') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ This is a helper generator which splits block ranges from the bmap + file to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ This is generator which reads the image file in '_batch_blocks' + chunks and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " \ + "image file '%s': %s" \ + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " \ + "calculated %s, should be %s (image file %s)" \ + % (first, last, hash_obj.hexdigest(), \ + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + self._batch_queue.put(("error", sys.exc_info())) + + self._batch_queue.put(None) + + def copy(self, sync = True, verify = True): + """ Copy the image to the destination file using bmap. The 'sync' + argument defines whether the destination file has to be synchronized + upon return. The 'verify' argument defines whether the SHA1 checksum + has to be verified while copying. """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[0], exc_info[1], exc_info[2] + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" \ + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " \ + "have %u - inconsistent bmap file '%s'" \ + % (blocks_written, self._image_path, self._dest_path, \ + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" \ + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ Synchronize the destination file to make sure all the data are + actually written to the disk. """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " \ + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ This class is a specialized version of 'BmapCopy' which copies the + image to a block device. Unlike the base 'BmapCopy' class, this class does + various optimizations specific to block devices, e.g., switching to the + 'noop' I/O scheduler. """ + + def _tune_block_device(self): + """" Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + # No problem, this is just an optimization + raise Error("cannot enable the 'noop' I/O scheduler: %s" % err) + + # The file contains a list of scheduler with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the current scheduler name + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + raise Error("cannot set max. I/O ratio to '1': %s" % err) + + def _restore_bdev_settings(self): + """ Restore old block device settings which we changed in + '_tune_block_device()'. """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" \ + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" \ + % (self._old_max_ratio_value, err)) + + def copy(self, sync = True, verify = True): + """ The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " \ + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " \ + "fit the block device '%s' which has %s capacity" \ + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_3.py b/tests/oldcodebase/BmapCopy2_3.py new file mode 100644 index 0000000..889f357 --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_3.py @@ -0,0 +1,667 @@ +""" This module implements copying of images with bmap and provides the +following API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. """ + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import Queue +import thread +import datetime +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = 1 + +class Error(Exception): + """ A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. """ + pass + +class BmapCopy: + """ This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file object of the destination file copy the image to + * full path or a file object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. """ + + def set_progress_indicator(self, file_obj, format_string): + """ Setup the progress indicator which shows how much data has been + copied in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ Set image size and initialize various other geometry-related + attributes. """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " \ + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _verify_bmap_checksum(self): + """ This is a helper function which verifies SHA1 checksum of the bmap + file. """ + + import mmap + + correct_sha1 = self._xml.find("BmapFileSHA1").text.strip() + + # Before verifying the shecksum, we have to substitute the SHA1 value + # stored in the file with all zeroes. For these purposes we create + # private memory mapping of the bmap file. + mapped_bmap = mmap.mmap(self._f_bmap.fileno(), 0, + access = mmap.ACCESS_COPY) + + sha1_pos = mapped_bmap.find(correct_sha1) + assert sha1_pos != -1 + + mapped_bmap[sha1_pos:sha1_pos + 40] = '0' * 40 + calculated_sha1 = hashlib.sha1(mapped_bmap).hexdigest() + + mapped_bmap.close() + + if calculated_sha1 != correct_sha1: + raise Error("checksum mismatch for bmap file '%s': calculated " \ + "'%s', should be '%s'" % \ + (self._bmap_path, calculated_sha1, correct_sha1)) + + def _parse_bmap(self): + """ Parse the bmap file and initialize corresponding class instance + attributs. """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " \ + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + self.bmap_version_major = int(self.bmap_version.split('.', 1)[0]) + self.bmap_version_minor = int(self.bmap_version.split('.', 1)[1]) + if self.bmap_version_major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " \ + "version %d is not supported" \ + % (SUPPORTED_BMAP_VERSION, self.bmap_version_major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " \ + "blocks count (%d bytes != %d blocks * %d bytes)" \ + % (self.image_size, self.blocks_cnt, self.block_size)) + + if self.bmap_version_major >= 1 and self.bmap_version_minor >= 3: + # Bmap file checksum appeard in format 1.3 + self._verify_bmap_checksum() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file object of the destination file to copy the image + to. + bmap - file object of the bmap file to use for copying. + image_size - size of the image in bytes. """ + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.bmap_version_major = None + self.bmap_version_minor = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def _update_progress(self, blocks_written): + """ Print the progress indicator if the mapped area size is known and + if the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds = 250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ This is a helper generator that parses the bmap XML file and for + each block range in the XML file it yields ('first', 'last', 'sha1') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ This is a helper generator which splits block ranges from the bmap + file to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ This is generator which reads the image file in '_batch_blocks' + chunks and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " \ + "image file '%s': %s" \ + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " \ + "calculated %s, should be %s (image file %s)" \ + % (first, last, hash_obj.hexdigest(), \ + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + self._batch_queue.put(("error", sys.exc_info())) + + self._batch_queue.put(None) + + def copy(self, sync = True, verify = True): + """ Copy the image to the destination file using bmap. The 'sync' + argument defines whether the destination file has to be synchronized + upon return. The 'verify' argument defines whether the SHA1 checksum + has to be verified while copying. """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[0], exc_info[1], exc_info[2] + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" \ + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " \ + "have %u - bmap file '%s' does not belong to this" \ + "image" \ + % (blocks_written, self._image_path, self._dest_path, \ + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" \ + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ Synchronize the destination file to make sure all the data are + actually written to the disk. """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " \ + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ This class is a specialized version of 'BmapCopy' which copies the + image to a block device. Unlike the base 'BmapCopy' class, this class does + various optimizations specific to block devices, e.g., switching to the + 'noop' I/O scheduler. """ + + def _tune_block_device(self): + """" Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + # No problem, this is just an optimization + raise Error("cannot enable the 'noop' I/O scheduler: %s" % err) + + # The file contains a list of scheduler with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the current scheduler name + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + raise Error("cannot set max. I/O ratio to '1': %s" % err) + + def _restore_bdev_settings(self): + """ Restore old block device settings which we changed in + '_tune_block_device()'. """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" \ + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" \ + % (self._old_max_ratio_value, err)) + + def copy(self, sync = True, verify = True): + """ The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " \ + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " \ + "fit the block device '%s' which has %s capacity" \ + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_4.py b/tests/oldcodebase/BmapCopy2_4.py new file mode 100644 index 0000000..889f357 --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_4.py @@ -0,0 +1,667 @@ +""" This module implements copying of images with bmap and provides the +following API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. """ + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import Queue +import thread +import datetime +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = 1 + +class Error(Exception): + """ A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. """ + pass + +class BmapCopy: + """ This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file object of the destination file copy the image to + * full path or a file object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. """ + + def set_progress_indicator(self, file_obj, format_string): + """ Setup the progress indicator which shows how much data has been + copied in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ Set image size and initialize various other geometry-related + attributes. """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " \ + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _verify_bmap_checksum(self): + """ This is a helper function which verifies SHA1 checksum of the bmap + file. """ + + import mmap + + correct_sha1 = self._xml.find("BmapFileSHA1").text.strip() + + # Before verifying the shecksum, we have to substitute the SHA1 value + # stored in the file with all zeroes. For these purposes we create + # private memory mapping of the bmap file. + mapped_bmap = mmap.mmap(self._f_bmap.fileno(), 0, + access = mmap.ACCESS_COPY) + + sha1_pos = mapped_bmap.find(correct_sha1) + assert sha1_pos != -1 + + mapped_bmap[sha1_pos:sha1_pos + 40] = '0' * 40 + calculated_sha1 = hashlib.sha1(mapped_bmap).hexdigest() + + mapped_bmap.close() + + if calculated_sha1 != correct_sha1: + raise Error("checksum mismatch for bmap file '%s': calculated " \ + "'%s', should be '%s'" % \ + (self._bmap_path, calculated_sha1, correct_sha1)) + + def _parse_bmap(self): + """ Parse the bmap file and initialize corresponding class instance + attributs. """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " \ + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + self.bmap_version_major = int(self.bmap_version.split('.', 1)[0]) + self.bmap_version_minor = int(self.bmap_version.split('.', 1)[1]) + if self.bmap_version_major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " \ + "version %d is not supported" \ + % (SUPPORTED_BMAP_VERSION, self.bmap_version_major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " \ + "blocks count (%d bytes != %d blocks * %d bytes)" \ + % (self.image_size, self.blocks_cnt, self.block_size)) + + if self.bmap_version_major >= 1 and self.bmap_version_minor >= 3: + # Bmap file checksum appeard in format 1.3 + self._verify_bmap_checksum() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file object of the destination file to copy the image + to. + bmap - file object of the bmap file to use for copying. + image_size - size of the image in bytes. """ + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.bmap_version_major = None + self.bmap_version_minor = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def _update_progress(self, blocks_written): + """ Print the progress indicator if the mapped area size is known and + if the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds = 250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ This is a helper generator that parses the bmap XML file and for + each block range in the XML file it yields ('first', 'last', 'sha1') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ This is a helper generator which splits block ranges from the bmap + file to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ This is generator which reads the image file in '_batch_blocks' + chunks and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " \ + "image file '%s': %s" \ + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " \ + "calculated %s, should be %s (image file %s)" \ + % (first, last, hash_obj.hexdigest(), \ + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + self._batch_queue.put(("error", sys.exc_info())) + + self._batch_queue.put(None) + + def copy(self, sync = True, verify = True): + """ Copy the image to the destination file using bmap. The 'sync' + argument defines whether the destination file has to be synchronized + upon return. The 'verify' argument defines whether the SHA1 checksum + has to be verified while copying. """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[0], exc_info[1], exc_info[2] + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" \ + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " \ + "have %u - bmap file '%s' does not belong to this" \ + "image" \ + % (blocks_written, self._image_path, self._dest_path, \ + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" \ + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ Synchronize the destination file to make sure all the data are + actually written to the disk. """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " \ + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ This class is a specialized version of 'BmapCopy' which copies the + image to a block device. Unlike the base 'BmapCopy' class, this class does + various optimizations specific to block devices, e.g., switching to the + 'noop' I/O scheduler. """ + + def _tune_block_device(self): + """" Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + # No problem, this is just an optimization + raise Error("cannot enable the 'noop' I/O scheduler: %s" % err) + + # The file contains a list of scheduler with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the current scheduler name + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + raise Error("cannot set max. I/O ratio to '1': %s" % err) + + def _restore_bdev_settings(self): + """ Restore old block device settings which we changed in + '_tune_block_device()'. """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" \ + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" \ + % (self._old_max_ratio_value, err)) + + def copy(self, sync = True, verify = True): + """ The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() + + def __init__(self, image, dest, bmap = None, image_size = None): + """ The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " \ + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " \ + "fit the block device '%s' which has %s capacity" \ + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_5.py b/tests/oldcodebase/BmapCopy2_5.py new file mode 100644 index 0000000..a025cb5 --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_5.py @@ -0,0 +1,724 @@ +# Copyright (c) 2012-2013 Intel, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module implements copying of images with bmap and provides the following +API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. +""" + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import logging +import Queue +import thread +import datetime +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = 1 + +class Error(Exception): + """ + A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. + """ + pass + +class BmapCopy: + """ + This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file object of the destination file copy the image to + * full path or a file object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. + """ + + def set_progress_indicator(self, file_obj, format_string): + """ + Setup the progress indicator which shows how much data has been copied + in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. + """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ + Set image size and initialize various other geometry-related attributes. + """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _verify_bmap_checksum(self): + """ + This is a helper function which verifies SHA1 checksum of the bmap file. + """ + + import mmap + + correct_sha1 = self._xml.find("BmapFileSHA1").text.strip() + + # Before verifying the shecksum, we have to substitute the SHA1 value + # stored in the file with all zeroes. For these purposes we create + # private memory mapping of the bmap file. + mapped_bmap = mmap.mmap(self._f_bmap.fileno(), 0, + access = mmap.ACCESS_COPY) + + sha1_pos = mapped_bmap.find(correct_sha1) + assert sha1_pos != -1 + + mapped_bmap[sha1_pos:sha1_pos + 40] = '0' * 40 + calculated_sha1 = hashlib.sha1(mapped_bmap).hexdigest() + + mapped_bmap.close() + + if calculated_sha1 != correct_sha1: + raise Error("checksum mismatch for bmap file '%s': calculated " + "'%s', should be '%s'" + % (self._bmap_path, calculated_sha1, correct_sha1)) + + def _parse_bmap(self): + """ + Parse the bmap file and initialize corresponding class instance attributs. + """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + self.bmap_version_major = int(self.bmap_version.split('.', 1)[0]) + self.bmap_version_minor = int(self.bmap_version.split('.', 1)[1]) + if self.bmap_version_major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " + "version %d is not supported" + % (SUPPORTED_BMAP_VERSION, self.bmap_version_major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " + "blocks count (%d bytes != %d blocks * %d bytes)" + % (self.image_size, self.blocks_cnt, self.block_size)) + + if self.bmap_version_major >= 1 and self.bmap_version_minor >= 3: + # Bmap file checksum appeard in format 1.3 + self._verify_bmap_checksum() + + def __init__(self, image, dest, bmap=None, image_size=None, logger=None): + """ + The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file object of the destination file to copy the image + to. + bmap - file object of the bmap file to use for copying. + image_size - size of the image in bytes. + logger - the logger object to use for printing messages. + """ + + self._logger = logger + if self._logger is None: + self._logger = logging.getLogger(__name__) + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.bmap_version_major = None + self.bmap_version_minor = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def _update_progress(self, blocks_written): + """ + Print the progress indicator if the mapped area size is known and if + the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. + """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds=250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ + This is a helper generator that parses the bmap XML file and for each + block range in the XML file it yields ('first', 'last', 'sha1') tuples, + where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. + """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ + This is a helper generator which splits block ranges from the bmap file + to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). + """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ + This is generator which reads the image file in '_batch_blocks' chunks + and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. + """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " + "image file '%s': %s" + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " + "calculated %s, should be %s (image file %s)" + % (first, last, hash_obj.hexdigest(), + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + self._batch_queue.put(("error", sys.exc_info())) + + self._batch_queue.put(None) + + def copy(self, sync=True, verify=True): + """ + Copy the image to the destination file using bmap. The 'sync' argument + defines whether the destination file has to be synchronized upon + return. The 'verify' argument defines whether the SHA1 checksum has to + be verified while copying. + """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[0], exc_info[1], exc_info[2] + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " + "have %u - bmap file '%s' does not belong to this" + "image" + % (blocks_written, self._image_path, self._dest_path, + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ + Synchronize the destination file to make sure all the data are actually + written to the disk. + """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ + This class is a specialized version of 'BmapCopy' which copies the image to + a block device. Unlike the base 'BmapCopy' class, this class does various + optimizations specific to block devices, e.g., switching to the 'noop' I/O + scheduler. + """ + + def _tune_block_device(self): + """ + Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + self._logger.warning("failed to enable I/O optimization, expect " + "suboptimal speed (reason: cannot switch " + "to the 'noop' I/O scheduler: %s)" % err) + else: + # The file contains a list of schedulers with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the name of the current scheduler. + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering, because we do not need too much of it when + # writing sequntially. Excessive buffering makes some systems not very + # responsive, e.g., this was observed in Fedora 17. + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + self._logger.warning("failed to disable excessive buffering, " + "expect worse system responsiveness " + "(reason: cannot set max. I/O ratio to " + "1: %s)" % err) + + def _restore_bdev_settings(self): + """ + Restore old block device settings which we changed in + '_tune_block_device()'. + """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" + % (self._old_max_ratio_value, err)) + + def copy(self, sync=True, verify=True): + """ + The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. + """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() + + def __init__(self, image, dest, bmap=None, image_size=None, logger=None): + """ + The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. + """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size, logger=logger) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " + "fit the block device '%s' which has %s capacity" + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" diff --git a/tests/oldcodebase/BmapCopy2_6.py b/tests/oldcodebase/BmapCopy2_6.py new file mode 100644 index 0000000..4a9c8cc --- /dev/null +++ b/tests/oldcodebase/BmapCopy2_6.py @@ -0,0 +1,724 @@ +# Copyright (c) 2012-2013 Intel, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module implements copying of images with bmap and provides the following +API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. +""" + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import logging +import Queue +import thread +import datetime +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = 1 + +class Error(Exception): + """ + A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. + """ + pass + +class BmapCopy: + """ + This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file object of the destination file copy the image to + * full path or a file object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the SHA1 checksum while copying or not. Note, this is done only in + case of bmap-based copying and only if bmap contains the SHA1 checksums + (e.g., bmap version 1.0 did not have SHA1 checksums). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. + """ + + def __init__(self, image, dest, bmap=None, image_size=None, logger=None): + """ + The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file object of the destination file to copy the image + to. + bmap - file object of the bmap file to use for copying. + image_size - size of the image in bytes. + logger - the logger object to use for printing messages. + """ + + self._logger = logger + if self._logger is None: + self._logger = logging.getLogger(__name__) + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 2 + + self.bmap_version = None + self.bmap_version_major = None + self.bmap_version_minor = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def set_progress_indicator(self, file_obj, format_string): + """ + Setup the progress indicator which shows how much data has been copied + in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. + """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ + Set image size and initialize various other geometry-related attributes. + """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _verify_bmap_checksum(self): + """ + This is a helper function which verifies SHA1 checksum of the bmap file. + """ + + import mmap + + correct_sha1 = self._xml.find("BmapFileSHA1").text.strip() + + # Before verifying the shecksum, we have to substitute the SHA1 value + # stored in the file with all zeroes. For these purposes we create + # private memory mapping of the bmap file. + mapped_bmap = mmap.mmap(self._f_bmap.fileno(), 0, + access = mmap.ACCESS_COPY) + + sha1_pos = mapped_bmap.find(correct_sha1) + assert sha1_pos != -1 + + mapped_bmap[sha1_pos:sha1_pos + 40] = '0' * 40 + calculated_sha1 = hashlib.sha1(mapped_bmap).hexdigest() + + mapped_bmap.close() + + if calculated_sha1 != correct_sha1: + raise Error("checksum mismatch for bmap file '%s': calculated " + "'%s', should be '%s'" + % (self._bmap_path, calculated_sha1, correct_sha1)) + + def _parse_bmap(self): + """ + Parse the bmap file and initialize corresponding class instance attributs. + """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + raise Error("cannot parse the bmap file '%s' which should be a " + "proper XML file: %s" % (self._bmap_path, err)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + self.bmap_version_major = int(self.bmap_version.split('.', 1)[0]) + self.bmap_version_minor = int(self.bmap_version.split('.', 1)[1]) + if self.bmap_version_major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " + "version %d is not supported" + % (SUPPORTED_BMAP_VERSION, self.bmap_version_major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " + "blocks count (%d bytes != %d blocks * %d bytes)" + % (self.image_size, self.blocks_cnt, self.block_size)) + + if self.bmap_version_major >= 1 and self.bmap_version_minor >= 3: + # Bmap file checksum appeard in format 1.3 + self._verify_bmap_checksum() + + def _update_progress(self, blocks_written): + """ + Print the progress indicator if the mapped area size is known and if + the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. + """ + + if not self._progress_file: + return + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds=250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ + This is a helper generator that parses the bmap XML file and for each + block range in the XML file it yields ('first', 'last', 'sha1') tuples, + where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'sha1' is the SHA1 checksum of the range ('None' is used if it is + missing. + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. + """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if 'sha1' in xml_element.attrib: + sha1 = xml_element.attrib['sha1'] + else: + sha1 = None + + yield (first, last, sha1) + + def _get_batches(self, first, last): + """ + This is a helper generator which splits block ranges from the bmap file + to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). + """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ + This is generator which reads the image file in '_batch_blocks' chunks + and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. + """ + + try: + for (first, last, sha1) in self._get_block_ranges(): + if verify and sha1: + hash_obj = hashlib.new('sha1') + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " + "image file '%s': %s" + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and sha1: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and sha1 and hash_obj.hexdigest() != sha1: + raise Error("checksum mismatch for blocks range %d-%d: " + "calculated %s, should be %s (image file %s)" + % (first, last, hash_obj.hexdigest(), + sha1, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + self._batch_queue.put(("error", sys.exc_info())) + + self._batch_queue.put(None) + + def copy(self, sync=True, verify=True): + """ + Copy the image to the destination file using bmap. The 'sync' argument + defines whether the destination file has to be synchronized upon + return. The 'verify' argument defines whether the SHA1 checksum has to + be verified while copying. + """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[0], exc_info[1], exc_info[2] + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " + "have %u - bmap file '%s' does not belong to this " + "image" + % (blocks_written, self._image_path, self._dest_path, + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ + Synchronize the destination file to make sure all the data are actually + written to the disk. + """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ + This class is a specialized version of 'BmapCopy' which copies the image to + a block device. Unlike the base 'BmapCopy' class, this class does various + optimizations specific to block devices, e.g., switching to the 'noop' I/O + scheduler. + """ + + def __init__(self, image, dest, bmap=None, image_size=None, logger=None): + """ + The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. + """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size, logger=logger) + + self._batch_bytes = 1024 * 1024 + self._batch_blocks = self._batch_bytes / self.block_size + self._batch_queue_len = 6 + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " + "fit the block device '%s' which has %s capacity" + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" \ + % (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" + + def _tune_block_device(self): + """ + Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + self._logger.warning("failed to enable I/O optimization, expect " + "suboptimal speed (reason: cannot switch " + "to the 'noop' I/O scheduler: %s)" % err) + else: + # The file contains a list of schedulers with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the name of the current scheduler. + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering, because we do not need too much of it when + # writing sequntially. Excessive buffering makes some systems not very + # responsive, e.g., this was observed in Fedora 17. + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + self._logger.warning("failed to disable excessive buffering, " + "expect worse system responsiveness " + "(reason: cannot set max. I/O ratio to " + "1: %s)" % err) + + def _restore_bdev_settings(self): + """ + Restore old block device settings which we changed in + '_tune_block_device()'. + """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" + % (self._old_max_ratio_value, err)) + + def copy(self, sync=True, verify=True): + """ + The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. + """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() diff --git a/tests/oldcodebase/BmapCopy3_0.py b/tests/oldcodebase/BmapCopy3_0.py new file mode 100644 index 0000000..fe8897c --- /dev/null +++ b/tests/oldcodebase/BmapCopy3_0.py @@ -0,0 +1,764 @@ +# Copyright (c) 2012-2013 Intel, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module implements copying of images with bmap and provides the following +API. + 1. BmapCopy class - implements copying to any kind of file, be that a block + device or a regular file. + 2. BmapBdevCopy class - based on BmapCopy and specializes on copying to block + devices. It does some more sanity checks and some block device performance + tuning. + +The bmap file is an XML file which contains a list of mapped blocks of the +image. Mapped blocks are the blocks which have disk sectors associated with +them, as opposed to holes, which are blocks with no associated disk sectors. In +other words, the image is considered to be a sparse file, and bmap basically +contains a list of mapped blocks of this sparse file. The bmap additionally +contains some useful information like block size (usually 4KiB), image size, +mapped blocks count, etc. + +The bmap is used for copying the image to a block device or to a regular file. +The idea is that we copy quickly with bmap because we copy only mapped blocks +and ignore the holes, because they are useless. And if the image is generated +properly (starting with a huge hole and writing all the data), it usually +contains only little mapped blocks, comparing to the overall image size. And +such an image compresses very well (because holes are read as all zeroes), so +it is beneficial to distributor them as compressed files along with the bmap. + +Here is an example. Suppose you have a 4GiB image which contains only 100MiB of +user data and you need to flash it to a slow USB stick. With bmap you end up +copying only a little bit more than 100MiB of data from the image to the USB +stick (namely, you copy only mapped blocks). This is a lot faster than copying +all 4GiB of data. We say that it is a bit more than 100MiB because things like +file-system meta-data (inode tables, superblocks, etc), partition table, etc +also contribute to the mapped blocks and are also copied. +""" + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import stat +import sys +import hashlib +import logging +import Queue +import thread +import datetime +from xml.etree import ElementTree +from bmaptools.BmapHelpers import human_size + +# The highest supported bmap format version +SUPPORTED_BMAP_VERSION = 1 + +class Error(Exception): + """ + A class for exceptions generated by the 'BmapCopy' module. We currently + support only one type of exceptions, and we basically throw human-readable + problem description in case of errors. + """ + pass + +class BmapCopy: + """ + This class implements the bmap-based copying functionality. To copy an + image with bmap you should create an instance of this class, which requires + the following: + + * full path or a file-like object of the image to copy + * full path or a file object of the destination file copy the image to + * full path or a file object of the bmap file (optional) + * image size in bytes (optional) + + Although the main purpose of this class is to use bmap, the bmap is not + required, and if it was not provided then the entire image will be copied + to the destination file. + + When the bmap is provided, it is not necessary to specify image size, + because the size is contained in the bmap. Otherwise, it is benefitial to + specify the size because it enables extra sanity checks and makes it + possible to provide the progress bar. + + When the image size is known either from the bmap or the caller specified + it to the class constructor, all the image geometry description attributes + ('blocks_cnt', etc) are initialized by the class constructor and available + for the user. + + However, when the size is not known, some of the image geometry + description attributes are not initialized by the class constructor. + Instead, they are initialized only by the 'copy()' method. + + The 'copy()' method implements image copying. You may choose whether to + verify the checksum while copying or not. Note, this is done only in case + of bmap-based copying and only if bmap contains checksums (e.g., bmap + version 1.0 did not have checksums support). + + You may choose whether to synchronize the destination file after writing or + not. To explicitly synchronize it, use the 'sync()' method. + + This class supports all the bmap format versions up version + 'SUPPORTED_BMAP_VERSION'. + + It is possible to have a simple progress indicator while copying the image. + Use the 'set_progress_indicator()' method. + + You can copy only once with an instance of this class. This means that in + order to copy the image for the second time, you have to create a new class + instance. + """ + + def __init__(self, image, dest, bmap=None, image_size=None, log=None): + """ + The class constructor. The parameters are: + image - file-like object of the image which should be copied, + should only support 'read()' and 'seek()' methods, + and only seeking forward has to be supported. + dest - file object of the destination file to copy the image + to. + bmap - file object of the bmap file to use for copying. + image_size - size of the image in bytes. + log - the logger object to use for printing messages. + """ + + self._log = log + if self._log is None: + self._log = logging.getLogger(__name__) + + self._xml = None + + self._dest_fsync_watermark = None + self._batch_blocks = None + self._batch_queue = None + self._batch_bytes = 1024 * 1024 + self._batch_queue_len = 6 + + self.bmap_version = None + self.bmap_version_major = None + self.bmap_version_minor = None + self.block_size = None + self.blocks_cnt = None + self.mapped_cnt = None + self.image_size = None + self.image_size_human = None + self.mapped_size = None + self.mapped_size_human = None + self.mapped_percent = None + + self._f_bmap = None + self._f_bmap_path = None + + self._progress_started = None + self._progress_index = None + self._progress_time = None + self._progress_file = None + self._progress_format = None + self.set_progress_indicator(None, None) + + self._f_image = image + self._image_path = image.name + + self._f_dest = dest + self._dest_path = dest.name + st_data = os.fstat(self._f_dest.fileno()) + self._dest_is_regfile = stat.S_ISREG(st_data.st_mode) + + # The bmap file checksum type and length + self._cs_type = None + self._cs_len = None + self._cs_attrib_name = None + + # Special quirk for /dev/null which does not support fsync() + if stat.S_ISCHR(st_data.st_mode) and \ + os.major(st_data.st_rdev) == 1 and \ + os.minor(st_data.st_rdev) == 3: + self._dest_supports_fsync = False + else: + self._dest_supports_fsync = True + + if bmap: + self._f_bmap = bmap + self._bmap_path = bmap.name + self._parse_bmap() + else: + # There is no bmap. Initialize user-visible attributes to something + # sensible with an assumption that we just have all blocks mapped. + self.bmap_version = 0 + self.block_size = 4096 + self.mapped_percent = 100 + + if image_size: + self._set_image_size(image_size) + + self._batch_blocks = self._batch_bytes / self.block_size + + def set_progress_indicator(self, file_obj, format_string): + """ + Setup the progress indicator which shows how much data has been copied + in percent. + + The 'file_obj' argument is the console file object where the progress + has to be printed to. Pass 'None' to disable the progress indicator. + + The 'format_string' argument is the format string for the progress + indicator. It has to contain a single '%d' placeholder which will be + substitutes with copied data in percent. + """ + + self._progress_file = file_obj + if format_string: + self._progress_format = format_string + else: + self._progress_format = "Copied %d%%" + + def _set_image_size(self, image_size): + """ + Set image size and initialize various other geometry-related attributes. + """ + + if self.image_size is not None and self.image_size != image_size: + raise Error("cannot set image size to %d bytes, it is known to " + "be %d bytes (%s)" % (image_size, self.image_size, + self.image_size_human)) + + self.image_size = image_size + self.image_size_human = human_size(image_size) + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + if self.mapped_cnt is None: + self.mapped_cnt = self.blocks_cnt + self.mapped_size = self.image_size + self.mapped_size_human = self.image_size_human + + def _verify_bmap_checksum(self): + """ + This is a helper function which verifies the bmap file checksum. + """ + + import mmap + + if self.bmap_version_minor == 3: + correct_chksum = self._xml.find("BmapFileSHA1").text.strip() + else: + correct_chksum = self._xml.find("BmapFileChecksum").text.strip() + + # Before verifying the shecksum, we have to substitute the checksum + # value stored in the file with all zeroes. For these purposes we + # create private memory mapping of the bmap file. + mapped_bmap = mmap.mmap(self._f_bmap.fileno(), 0, + access = mmap.ACCESS_COPY) + + chksum_pos = mapped_bmap.find(correct_chksum) + assert chksum_pos != -1 + + mapped_bmap[chksum_pos:chksum_pos + self._cs_len] = '0' * self._cs_len + + hash_obj = hashlib.new(self._cs_type) + hash_obj.update(mapped_bmap) + calculated_chksum = hash_obj.hexdigest() + + mapped_bmap.close() + + if calculated_chksum != correct_chksum: + raise Error("checksum mismatch for bmap file '%s': calculated " + "'%s', should be '%s'" + % (self._bmap_path, calculated_chksum, correct_chksum)) + + def _parse_bmap(self): + """ + Parse the bmap file and initialize corresponding class instance attributs. + """ + + try: + self._xml = ElementTree.parse(self._f_bmap) + except ElementTree.ParseError as err: + # Extrace the erroneous line with some context + self._f_bmap.seek(0) + xml_extract = "" + for num, line in enumerate(self._f_bmap): + if num >= err.position[0] - 4 and num <= err.position[0] + 4: + xml_extract += "Line %d: %s" % (num, line) + + raise Error("cannot parse the bmap file '%s' which should be a " + "proper XML file: %s, the XML extract:\n%s" % + (self._bmap_path, err, xml_extract)) + + xml = self._xml + self.bmap_version = str(xml.getroot().attrib.get('version')) + + # Make sure we support this version + self.bmap_version_major = int(self.bmap_version.split('.', 1)[0]) + self.bmap_version_minor = int(self.bmap_version.split('.', 1)[1]) + if self.bmap_version_major > SUPPORTED_BMAP_VERSION: + raise Error("only bmap format version up to %d is supported, " + "version %d is not supported" + % (SUPPORTED_BMAP_VERSION, self.bmap_version_major)) + + # Fetch interesting data from the bmap XML file + self.block_size = int(xml.find("BlockSize").text.strip()) + self.blocks_cnt = int(xml.find("BlocksCount").text.strip()) + self.mapped_cnt = int(xml.find("MappedBlocksCount").text.strip()) + self.image_size = int(xml.find("ImageSize").text.strip()) + self.image_size_human = human_size(self.image_size) + self.mapped_size = self.mapped_cnt * self.block_size + self.mapped_size_human = human_size(self.mapped_size) + self.mapped_percent = (self.mapped_cnt * 100.0) / self.blocks_cnt + + blocks_cnt = (self.image_size + self.block_size - 1) / self.block_size + if self.blocks_cnt != blocks_cnt: + raise Error("Inconsistent bmap - image size does not match " + "blocks count (%d bytes != %d blocks * %d bytes)" + % (self.image_size, self.blocks_cnt, self.block_size)) + + if self.bmap_version_major >= 1 and self.bmap_version_minor >= 3: + # Bmap file checksum appeard in format 1.3 and the only supported + # checksum type was SHA1. Version 1.4 started supporting arbitrary + # checksum types. A new "ChecksumType" tag was introduce to specify + # the checksum function name. And all XML tags which contained + # "sha1" in their name were renamed to something more neutral. + if self.bmap_version_minor == 3: + self._cs_type = "sha1" + self._cs_attrib_name = "sha1" + else: + self._cs_type = xml.find("ChecksumType").text.strip() + self._cs_attrib_name = "chksum" + + try: + self._cs_len = len(hashlib.new(self._cs_type).hexdigest()) + except ValueError as err: + raise Error("cannot initialize hash function \"%s\": %s" % + (self._cs_type, err)) + self._verify_bmap_checksum() + + def _update_progress(self, blocks_written): + """ + Print the progress indicator if the mapped area size is known and if + the indicator has been enabled by assigning a console file object to + the 'progress_file' attribute. + """ + + if self.mapped_cnt: + assert blocks_written <= self.mapped_cnt + percent = int((float(blocks_written) / self.mapped_cnt) * 100) + self._log.debug("wrote %d blocks out of %d (%d%%)" % + (blocks_written, self.mapped_cnt, percent)) + else: + self._log.debug("wrote %d blocks" % blocks_written) + + if not self._progress_file: + return + + if self.mapped_cnt: + progress = '\r' + self._progress_format % percent + '\n' + else: + # Do not rotate the wheel too fast + now = datetime.datetime.now() + min_delta = datetime.timedelta(milliseconds=250) + if now - self._progress_time < min_delta: + return + self._progress_time = now + + progress_wheel = ('-', '\\', '|', '/') + progress = '\r' + progress_wheel[self._progress_index % 4] + '\n' + self._progress_index += 1 + + # This is a little trick we do in order to make sure that the next + # message will always start from a new line - we switch to the new + # line after each progress update and move the cursor up. As an + # example, this is useful when the copying is interrupted by an + # exception - the error message will start form new line. + if self._progress_started: + # The "move cursor up" escape sequence + self._progress_file.write('\033[1A') # pylint: disable=W1401 + else: + self._progress_started = True + + self._progress_file.write(progress) + self._progress_file.flush() + + def _get_block_ranges(self): + """ + This is a helper generator that parses the bmap XML file and for each + block range in the XML file it yields ('first', 'last', 'chksum') + tuples, where: + * 'first' is the first block of the range; + * 'last' is the last block of the range; + * 'chksum' is the checksum of the range ('None' is used if it is + missing). + + If there is no bmap file, the generator just yields a single range + for entire image file. If the image size is unknown, the generator + infinitely yields continuous ranges of size '_batch_blocks'. + """ + + if not self._f_bmap: + # We do not have the bmap, yield a tuple with all blocks + if self.blocks_cnt: + yield (0, self.blocks_cnt - 1, None) + else: + # We do not know image size, keep yielding tuples with many + # blocks infinitely. + first = 0 + while True: + yield (first, first + self._batch_blocks - 1, None) + first += self._batch_blocks + return + + # We have the bmap, just read it and yield block ranges + xml = self._xml + xml_bmap = xml.find("BlockMap") + + for xml_element in xml_bmap.findall("Range"): + blocks_range = xml_element.text.strip() + # The range of blocks has the "X - Y" format, or it can be just "X" + # in old bmap format versions. First, split the blocks range string + # and strip white-spaces. + split = [x.strip() for x in blocks_range.split('-', 1)] + + first = int(split[0]) + if len(split) > 1: + last = int(split[1]) + if first > last: + raise Error("bad range (first > last): '%s'" % blocks_range) + else: + last = first + + if self._cs_attrib_name in xml_element.attrib: + chksum = xml_element.attrib[self._cs_attrib_name] + else: + chksum = None + + yield (first, last, chksum) + + def _get_batches(self, first, last): + """ + This is a helper generator which splits block ranges from the bmap file + to smaller batches. Indeed, we cannot read and write entire block + ranges from the image file, because a range can be very large. So we + perform the I/O in batches. Batch size is defined by the + '_batch_blocks' attribute. Thus, for each (first, last) block range, + the generator yields smaller (start, end, length) batch ranges, where: + * 'start' is the starting batch block number; + * 'last' is the ending batch block number; + * 'length' is the batch length in blocks (same as + 'end' - 'start' + 1). + """ + + batch_blocks = self._batch_blocks + + while first + batch_blocks - 1 <= last: + yield (first, first + batch_blocks - 1, batch_blocks) + first += batch_blocks + + batch_blocks = last - first + 1 + if batch_blocks: + yield (first, first + batch_blocks - 1, batch_blocks) + + def _get_data(self, verify): + """ + This is generator which reads the image file in '_batch_blocks' chunks + and yields ('type', 'start', 'end', 'buf) tuples, where: + * 'start' is the starting block number of the batch; + * 'end' is the last block of the batch; + * 'buf' a buffer containing the batch data. + """ + + try: + for (first, last, chksum) in self._get_block_ranges(): + if verify and chksum: + hash_obj = hashlib.new(self._cs_type) + + self._f_image.seek(first * self.block_size) + + iterator = self._get_batches(first, last) + for (start, end, length) in iterator: + try: + buf = self._f_image.read(length * self.block_size) + except IOError as err: + raise Error("error while reading blocks %d-%d of the " + "image file '%s': %s" + % (start, end, self._image_path, err)) + + if not buf: + self._batch_queue.put(None) + return + + if verify and chksum: + hash_obj.update(buf) + + blocks = (len(buf) + self.block_size - 1) / self.block_size + self._log.debug("queueing %d blocks, queue length is %d" % + (blocks, self._batch_queue.qsize())) + + self._batch_queue.put(("range", start, start + blocks - 1, + buf)) + + if verify and chksum and hash_obj.hexdigest() != chksum: + raise Error("checksum mismatch for blocks range %d-%d: " + "calculated %s, should be %s (image file %s)" + % (first, last, hash_obj.hexdigest(), + chksum, self._image_path)) + # Silence pylint warning about catching too general exception + # pylint: disable=W0703 + except Exception: + # pylint: enable=W0703 + # In case of any exception - just pass it to the main thread + # through the queue. + self._batch_queue.put(("error", sys.exc_info())) + + self._batch_queue.put(None) + + def copy(self, sync=True, verify=True): + """ + Copy the image to the destination file using bmap. The 'sync' argument + defines whether the destination file has to be synchronized upon + return. The 'verify' argument defines whether the checksum has to be + verified while copying. + """ + + # Create the queue for block batches and start the reader thread, which + # will read the image in batches and put the results to '_batch_queue'. + self._batch_queue = Queue.Queue(self._batch_queue_len) + thread.start_new_thread(self._get_data, (verify, )) + + blocks_written = 0 + bytes_written = 0 + fsync_last = 0 + + self._progress_started = False + self._progress_index = 0 + self._progress_time = datetime.datetime.now() + + # Read the image in '_batch_blocks' chunks and write them to the + # destination file + while True: + batch = self._batch_queue.get() + if batch is None: + # No more data, the image is written + break + elif batch[0] == "error": + # The reader thread encountered an error and passed us the + # exception. + exc_info = batch[1] + raise exc_info[0], exc_info[1], exc_info[2] + + (start, end, buf) = batch[1:4] + + assert len(buf) <= (end - start + 1) * self.block_size + assert len(buf) > (end - start) * self.block_size + + self._f_dest.seek(start * self.block_size) + + # Synchronize the destination file if we reached the watermark + if self._dest_fsync_watermark: + if blocks_written >= fsync_last + self._dest_fsync_watermark: + fsync_last = blocks_written + self.sync() + + try: + self._f_dest.write(buf) + except IOError as err: + raise Error("error while writing blocks %d-%d of '%s': %s" + % (start, end, self._dest_path, err)) + + self._batch_queue.task_done() + blocks_written += (end - start + 1) + bytes_written += len(buf) + + self._update_progress(blocks_written) + + if not self.image_size: + # The image size was unknown up until now, set it + self._set_image_size(bytes_written) + + # This is just a sanity check - we should have written exactly + # 'mapped_cnt' blocks. + if blocks_written != self.mapped_cnt: + raise Error("wrote %u blocks from image '%s' to '%s', but should " + "have %u - bmap file '%s' does not belong to this " + "image" + % (blocks_written, self._image_path, self._dest_path, + self.mapped_cnt, self._bmap_path)) + + if self._dest_is_regfile: + # Make sure the destination file has the same size as the image + try: + os.ftruncate(self._f_dest.fileno(), self.image_size) + except OSError as err: + raise Error("cannot truncate file '%s': %s" + % (self._dest_path, err)) + + try: + self._f_dest.flush() + except IOError as err: + raise Error("cannot flush '%s': %s" % (self._dest_path, err)) + + if sync: + self.sync() + + def sync(self): + """ + Synchronize the destination file to make sure all the data are actually + written to the disk. + """ + + if self._dest_supports_fsync: + try: + os.fsync(self._f_dest.fileno()), + except OSError as err: + raise Error("cannot synchronize '%s': %s " + % (self._dest_path, err.strerror)) + + +class BmapBdevCopy(BmapCopy): + """ + This class is a specialized version of 'BmapCopy' which copies the image to + a block device. Unlike the base 'BmapCopy' class, this class does various + optimizations specific to block devices, e.g., switching to the 'noop' I/O + scheduler. + """ + + def __init__(self, image, dest, bmap=None, image_size=None, log=None): + """ + The same as the constructor of the 'BmapCopy' base class, but adds + useful guard-checks specific to block devices. + """ + + # Call the base class constructor first + BmapCopy.__init__(self, image, dest, bmap, image_size, log=log) + + self._dest_fsync_watermark = (6 * 1024 * 1024) / self.block_size + + self._sysfs_base = None + self._sysfs_scheduler_path = None + self._sysfs_max_ratio_path = None + self._old_scheduler_value = None + self._old_max_ratio_value = None + + # If the image size is known, check that it fits the block device + if self.image_size: + try: + bdev_size = os.lseek(self._f_dest.fileno(), 0, os.SEEK_END) + os.lseek(self._f_dest.fileno(), 0, os.SEEK_SET) + except OSError as err: + raise Error("cannot seed block device '%s': %s " + % (self._dest_path, err.strerror)) + + if bdev_size < self.image_size: + raise Error("the image file '%s' has size %s and it will not " + "fit the block device '%s' which has %s capacity" + % (self._image_path, self.image_size_human, + self._dest_path, human_size(bdev_size))) + + # Construct the path to the sysfs directory of our block device + st_rdev = os.fstat(self._f_dest.fileno()).st_rdev + self._sysfs_base = "/sys/dev/block/%s:%s/" % \ + (os.major(st_rdev), os.minor(st_rdev)) + + # Check if the 'queue' sub-directory exists. If yes, then our block + # device is entire disk. Otherwise, it is a partition, in which case we + # need to go one level up in the sysfs hierarchy. + if not os.path.exists(self._sysfs_base + "queue"): + self._sysfs_base = self._sysfs_base + "../" + + self._sysfs_scheduler_path = self._sysfs_base + "queue/scheduler" + self._sysfs_max_ratio_path = self._sysfs_base + "bdi/max_ratio" + + def _tune_block_device(self): + """ + Tune the block device for better performance: + 1. Switch to the 'noop' I/O scheduler if it is available - sequential + write to the block device becomes a lot faster comparing to CFQ. + 2. Limit the write buffering - we do not need the kernel to buffer a + lot of the data we send to the block device, because we write + sequentially. Limit the buffering. + + The old settings are saved in order to be able to restore them later. + """ + # Switch to the 'noop' I/O scheduler + try: + with open(self._sysfs_scheduler_path, "r+") as f_scheduler: + contents = f_scheduler.read() + f_scheduler.seek(0) + f_scheduler.write("noop") + except IOError as err: + self._log.warning("failed to enable I/O optimization, expect " + "suboptimal speed (reason: cannot switch " + "to the 'noop' I/O scheduler: %s)" % err) + else: + # The file contains a list of schedulers with the current + # scheduler in square brackets, e.g., "noop deadline [cfq]". + # Fetch the name of the current scheduler. + import re + + match = re.match(r'.*\[(.+)\].*', contents) + if match: + self._old_scheduler_value = match.group(1) + + # Limit the write buffering, because we do not need too much of it when + # writing sequntially. Excessive buffering makes some systems not very + # responsive, e.g., this was observed in Fedora 17. + try: + with open(self._sysfs_max_ratio_path, "r+") as f_ratio: + self._old_max_ratio_value = f_ratio.read() + f_ratio.seek(0) + f_ratio.write("1") + except IOError as err: + self._log.warning("failed to disable excessive buffering, expect " + "worse system responsiveness (reason: cannot set " + "max. I/O ratio to 1: %s)" % err) + + def _restore_bdev_settings(self): + """ + Restore old block device settings which we changed in + '_tune_block_device()'. + """ + + if self._old_scheduler_value is not None: + try: + with open(self._sysfs_scheduler_path, "w") as f_scheduler: + f_scheduler.write(self._old_scheduler_value) + except IOError as err: + raise Error("cannot restore the '%s' I/O scheduler: %s" + % (self._old_scheduler_value, err)) + + if self._old_max_ratio_value is not None: + try: + with open(self._sysfs_max_ratio_path, "w") as f_ratio: + f_ratio.write(self._old_max_ratio_value) + except IOError as err: + raise Error("cannot set the max. I/O ratio back to '%s': %s" + % (self._old_max_ratio_value, err)) + + def copy(self, sync=True, verify=True): + """ + The same as in the base class but tunes the block device for better + performance before starting writing. Additionally, it forces block + device synchronization from time to time in order to make sure we do + not get stuck in 'fsync()' for too long time. The problem is that the + kernel synchronizes block devices when the file is closed. And the + result is that if the user interrupts us while we are copying the data, + the program will be blocked in 'close()' waiting for the block device + synchronization, which may last minutes for slow USB stick. This is + very bad user experience, and we work around this effect by + synchronizing from time to time. + """ + + self._tune_block_device() + + try: + BmapCopy.copy(self, sync, verify) + except: + raise + finally: + self._restore_bdev_settings() diff --git a/tests/oldcodebase/__init__.py b/tests/oldcodebase/__init__.py new file mode 100644 index 0000000..e69de29 -- 2.34.1