From 8a35306e9da4a12b2315d064ac288e186d6cf477 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 22 Jan 2014 10:04:06 +0200 Subject: [PATCH] Bmap: sync with the latest bmap-tools This patch syncs MIC's copy of BmapCreate with the latest contents of the bmap-tools project. The reason for syncing is to fix DEVT-139. The bugreport tells that MIC cannot generate the bmap file when the image resides on tmpfs. And now this is fixed. We also re-named the Fiemap module to Filemap, because now this module supports both FIEMAP ioctl and the SEEK_HOLE method. Here is some more description from the bmap-tools release notes. " Make 'bmaptool create' (and hence, the BmapCreate module) work with the "tmpfs" file-system. This file-system does not, unfortunately, support the "FIEMAP" ioctl, but it supports the "SEEK_HOLE" option of the "lseek" system call, which can also be used to find where the holes are. In this release we have implemented support for "SEEK_HOLE" to cover tmpfs. Generally, FIEMAP is faster than "SEEK_HOLE" for large files, so we always try to start with using FIEMAP, and if it is not supported, we fall-back to using "SEEK_HOLE". Therefore, the "Fiemap" module was re-named to "Filemap", since it is now supports more than just the FIEMAP ioctl. Unfortunately, the SEEK_HOLE method requires the directory where the image resides to be accessible for writing, because in current implementation we need to create a temporary file there for a short time. The temporary file is used to detect whether SEEK_HOLE is really implemented, or the system just fakes it by always returning EOF. " Change-Id: I75522cab3c32c9b0879967f8fa2587c71bae0395 Signed-off-by: Artem Bityutskiy --- mic/utils/BmapCreate.py | 44 ++-- mic/utils/Fiemap.py | 252 ----------------------- mic/utils/Filemap.py | 520 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 548 insertions(+), 268 deletions(-) delete mode 100644 mic/utils/Fiemap.py create mode 100644 mic/utils/Filemap.py diff --git a/mic/utils/BmapCreate.py b/mic/utils/BmapCreate.py index 7f74bcd..6934f1a 100644 --- a/mic/utils/BmapCreate.py +++ b/mic/utils/BmapCreate.py @@ -43,17 +43,23 @@ This module uses the FIBMAP ioctl to detect holes. # pylint: disable=R0902,R0903 import hashlib +import logging from mic.utils.misc import human_size -from mic.utils import Fiemap +from mic.utils import Filemap # The bmap format version we generate. # # Changelog: -# o 1.3 -> 1.4: +# o 1.3 -> 2.0: # Support SHA256 and SHA512 checksums, in 1.3 only SHA1 was supported. # "BmapFileChecksum" is used instead of "BmapFileSHA1", and "chksum=" -# attribute is used instead "sha1=". Introduced "ChecksumType" tag. -SUPPORTED_BMAP_VERSION = "1.4" +# attribute is used instead "sha1=". Introduced "ChecksumType" tag. This is +# an incompatible change. +# Note, bmap format 1.4 is identical to 2.0. Version 1.4 was a mistake, +# instead of incrementing the major version number, we incremented minor +# version number. Unfortunately, the mistake slipped into bmap-tools version +# 3.0, and was only fixed in bmap-tools v3.1. +SUPPORTED_BMAP_VERSION = "2.0" _BMAP_START_TEMPLATE = \ """ @@ -99,7 +105,7 @@ class Error(Exception): """ pass -class BmapCreate: +class BmapCreate(object): """ This class implements the bmap creation functionality. To generate a bmap for an image (which is supposedly a sparse file), you should first create @@ -112,7 +118,7 @@ class BmapCreate: the FIEMAP ioctl to generate the bmap. """ - def __init__(self, image, bmap, chksum_type="sha256"): + def __init__(self, image, bmap, chksum_type="sha256", log=None): """ Initialize a class instance: * image - full path or a file-like object of the image to create bmap @@ -121,8 +127,13 @@ class BmapCreate: bmap to * chksum - type of the check sum to use in the bmap file (all checksum types which python's "hashlib" module supports are allowed). + * log - the logger object to use for printing messages. """ + self._log = log + if self._log is None: + self._log = logging.getLogger(__name__) + self.image_size = None self.image_size_human = None self.block_size = None @@ -160,16 +171,19 @@ class BmapCreate: self._bmap_path = bmap self._open_bmap_file() - self.fiemap = Fiemap.Fiemap(self._f_image) + try: + self.filemap = Filemap.filemap(self._f_image, self._log) + except (Filemap.Error, Filemap.ErrorNotSupp) as err: + raise Error("cannot generate bmap: %s" % err) - self.image_size = self.fiemap.image_size + self.image_size = self.filemap.image_size self.image_size_human = human_size(self.image_size) if self.image_size == 0: raise Error("cannot generate bmap for zero-sized image file '%s'" % self._image_path) - self.block_size = self.fiemap.block_size - self.blocks_cnt = self.fiemap.blocks_cnt + self.block_size = self.filemap.block_size + self.blocks_cnt = self.filemap.blocks_cnt def __del__(self): """The class destructor which closes the opened files.""" @@ -207,8 +221,6 @@ class BmapCreate: # We do not know the amount of mapped blocks at the moment, so just put # whitespaces instead of real numbers. Assume the longest possible # numbers. - mapped_count = ' ' * len(str(self.image_size)) - mapped_size_human = ' ' * len(self.image_size_human) xml = _BMAP_START_TEMPLATE \ % (SUPPORTED_BMAP_VERSION, self.image_size_human, @@ -218,14 +230,14 @@ class BmapCreate: self._f_bmap.write(xml) self._mapped_count_pos1 = self._f_bmap.tell() - # Just put white-spaces instead of real information about mapped blocks - xml = "%s or %.1f -->\n" % (mapped_size_human, 100.0) + xml = "%s or %s -->\n" % (' ' * len(self.image_size_human), + ' ' * len("100.0%")) xml += " " self._f_bmap.write(xml) self._mapped_count_pos2 = self._f_bmap.tell() - xml = "%s \n\n" % mapped_count + xml = "%s \n\n" % (' ' * len(str(self.blocks_cnt))) # pylint: disable=C0301 xml += " \n" @@ -312,7 +324,7 @@ class BmapCreate: # Generate the block map and write it to the XML block map # file as we go. self.mapped_cnt = 0 - for first, last in self.fiemap.get_mapped_ranges(0, self.blocks_cnt): + for first, last in self.filemap.get_mapped_ranges(0, self.blocks_cnt): self.mapped_cnt += last - first + 1 if include_checksums: chksum = self._calculate_chksum(first, last) diff --git a/mic/utils/Fiemap.py b/mic/utils/Fiemap.py deleted file mode 100644 index f2db6ff..0000000 --- a/mic/utils/Fiemap.py +++ /dev/null @@ -1,252 +0,0 @@ -""" This module implements python API for the FIEMAP ioctl. The FIEMAP ioctl -allows to find holes and mapped areas in a file. """ - -# Note, a lot of code in this module is not very readable, because it deals -# with the rather complex FIEMAP ioctl. To understand the code, you need to -# know the FIEMAP interface, which is documented in the -# Documentation/filesystems/fiemap.txt file in the Linux kernel sources. - -# Disable the following pylint recommendations: -# * Too many instance attributes (R0902) -# pylint: disable=R0902 - -import os -import struct -import array -import fcntl -from mic.utils.misc import get_block_size - -# Format string for 'struct fiemap' -_FIEMAP_FORMAT = "=QQLLLL" -# sizeof(struct fiemap) -_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT) -# Format string for 'struct fiemap_extent' -_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL" -# sizeof(struct fiemap_extent) -_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT) -# The FIEMAP ioctl number -_FIEMAP_IOCTL = 0xC020660B - -# Minimum buffer which is required for 'class Fiemap' to operate -MIN_BUFFER_SIZE = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE -# The default buffer size for 'class Fiemap' -DEFAULT_BUFFER_SIZE = 256 * 1024 - -class Error(Exception): - """ A class for exceptions generated by this module. We currently support - only one type of exceptions, and we basically throw human-readable problem - description in case of errors. """ - pass - -class Fiemap: - """ This class provides API to the FIEMAP ioctl. Namely, it allows to - iterate over all mapped blocks and over all holes. """ - - def _open_image_file(self): - """ Open the image file. """ - - try: - self._f_image = open(self._image_path, 'rb') - except IOError as err: - raise Error("cannot open image file '%s': %s" \ - % (self._image_path, err)) - - self._f_image_needs_close = True - - def __init__(self, image, buf_size = DEFAULT_BUFFER_SIZE): - """ Initialize a class instance. The 'image' argument is full path to - the file to operate on, or a file object to operate on. - - The 'buf_size' argument is the size of the buffer for 'struct - fiemap_extent' elements which will be used when invoking the FIEMAP - ioctl. The larger is the buffer, the less times the FIEMAP ioctl will - be invoked. """ - - self._f_image_needs_close = False - - if hasattr(image, "fileno"): - self._f_image = image - self._image_path = image.name - else: - self._image_path = image - self._open_image_file() - - # Validate 'buf_size' - if buf_size < MIN_BUFFER_SIZE: - raise Error("too small buffer (%d bytes), minimum is %d bytes" \ - % (buf_size, MIN_BUFFER_SIZE)) - - # How many 'struct fiemap_extent' elements fit the buffer - buf_size -= _FIEMAP_SIZE - self._fiemap_extent_cnt = buf_size / _FIEMAP_EXTENT_SIZE - self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE - self._buf_size += _FIEMAP_SIZE - - # Allocate a mutable buffer for the FIEMAP ioctl - self._buf = array.array('B', [0] * self._buf_size) - - self.image_size = os.fstat(self._f_image.fileno()).st_size - - try: - self.block_size = get_block_size(self._f_image) - except IOError as err: - raise Error("cannot get block size for '%s': %s" \ - % (self._image_path, err)) - - self.blocks_cnt = self.image_size + self.block_size - 1 - self.blocks_cnt /= self.block_size - - # Synchronize the image file to make sure FIEMAP returns correct values - try: - self._f_image.flush() - except IOError as err: - raise Error("cannot flush image file '%s': %s" \ - % (self._image_path, err)) - try: - os.fsync(self._f_image.fileno()), - except OSError as err: - raise Error("cannot synchronize image file '%s': %s " \ - % (self._image_path, err.strerror)) - - # Check if the FIEMAP ioctl is supported - self.block_is_mapped(0) - - def __del__(self): - """ The class destructor which closes the opened files. """ - - if self._f_image_needs_close: - self._f_image.close() - - def _invoke_fiemap(self, block, count): - """ Invoke the FIEMAP ioctl for 'count' blocks of the file starting from - block number 'block'. - - The full result of the operation is stored in 'self._buf' on exit. - Returns the unpacked 'struct fiemap' data structure in form of a python - list (just like 'struct.upack()'). """ - - if block < 0 or block >= self.blocks_cnt: - raise Error("bad block number %d, should be within [0, %d]" \ - % (block, self.blocks_cnt)) - - # Initialize the 'struct fiemap' part of the buffer - struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size, - count * self.block_size, 0, 0, - self._fiemap_extent_cnt, 0) - - try: - fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1) - except IOError as err: - error_msg = "the FIEMAP ioctl failed for '%s': %s" \ - % (self._image_path, err) - if err.errno == os.errno.EPERM or err.errno == os.errno.EACCES: - # The FIEMAP ioctl was added in kernel version 2.6.28 in 2008 - error_msg += " (looks like your kernel does not support FIEMAP)" - - raise Error(error_msg) - - return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE]) - - def block_is_mapped(self, block): - """ This function returns 'True' if block number 'block' of the image - file is mapped and 'False' otherwise. """ - - struct_fiemap = self._invoke_fiemap(block, 1) - - # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field. - # If it contains zero, the block is not mapped, otherwise it is - # mapped. - return bool(struct_fiemap[3]) - - def block_is_unmapped(self, block): - """ This function returns 'True' if block number 'block' of the image - file is not mapped (hole) and 'False' otherwise. """ - - return not self.block_is_mapped(block) - - def _unpack_fiemap_extent(self, index): - """ Unpack a 'struct fiemap_extent' structure object number 'index' - from the internal 'self._buf' buffer. """ - - offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index - return struct.unpack(_FIEMAP_EXTENT_FORMAT, - self._buf[offset : offset + _FIEMAP_EXTENT_SIZE]) - - def _do_get_mapped_ranges(self, start, count): - """ Implements most the functionality for the 'get_mapped_ranges()' - generator: invokes the FIEMAP ioctl, walks through the mapped - extents and yields mapped block ranges. However, the ranges may be - consecutive (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' - simply merges them. """ - - block = start - while block < start + count: - struct_fiemap = self._invoke_fiemap(block, count) - - mapped_extents = struct_fiemap[3] - if mapped_extents == 0: - # No more mapped blocks - return - - extent = 0 - while extent < mapped_extents: - fiemap_extent = self._unpack_fiemap_extent(extent) - - # Start of the extent - extent_start = fiemap_extent[0] - # Starting block number of the extent - extent_block = extent_start / self.block_size - # Length of the extent - extent_len = fiemap_extent[2] - # Count of blocks in the extent - extent_count = extent_len / self.block_size - - # Extent length and offset have to be block-aligned - assert extent_start % self.block_size == 0 - assert extent_len % self.block_size == 0 - - if extent_block > start + count - 1: - return - - first = max(extent_block, block) - last = min(extent_block + extent_count, start + count) - 1 - yield (first, last) - - extent += 1 - - block = extent_block + extent_count - - def get_mapped_ranges(self, start, count): - """ A generator which yields ranges of mapped blocks in the file. The - ranges are tuples of 2 elements: [first, last], where 'first' is the - first mapped block and 'last' is the last mapped block. - - The ranges are yielded for the area of the file of size 'count' blocks, - starting from block 'start'. """ - - iterator = self._do_get_mapped_ranges(start, count) - - first_prev, last_prev = iterator.next() - - for first, last in iterator: - if last_prev == first - 1: - last_prev = last - else: - yield (first_prev, last_prev) - first_prev, last_prev = first, last - - yield (first_prev, last_prev) - - def get_unmapped_ranges(self, start, count): - """ Just like 'get_mapped_ranges()', but yields unmapped block ranges - instead (holes). """ - - hole_first = start - for first, last in self._do_get_mapped_ranges(start, count): - if first > hole_first: - yield (hole_first, first - 1) - - hole_first = last + 1 - - if hole_first < start + count: - yield (hole_first, start + count - 1) diff --git a/mic/utils/Filemap.py b/mic/utils/Filemap.py new file mode 100644 index 0000000..81d16c1 --- /dev/null +++ b/mic/utils/Filemap.py @@ -0,0 +1,520 @@ +# Copyright (c) 2012 Intel, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +""" +This module implements python implements a way to get file block. Two methods +are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of +the file seek syscall. The former is implemented by the 'FilemapFiemap' class, +the latter is implemented by the 'FilemapSeek' class. Both classes provide the +same API. The 'filemap' function automatically selects which class can be used +and returns an instance of the class. +""" + +# Disable the following pylint recommendations: +# * Too many instance attributes (R0902) +# pylint: disable=R0902 + +import os +import struct +import array +import fcntl +import tempfile +import logging +from mic.utils.misc import get_block_size + + +class ErrorNotSupp(Exception): + """ + An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature + is not supported either by the kernel or the file-system. + """ + pass + +class Error(Exception): + """A class for all the other exceptions raised by this module.""" + pass + + +class _FilemapBase(object): + """ + This is a base class for a couple of other classes in this module. This + class simply performs the common parts of the initialization process: opens + the image file, gets its size, etc. The 'log' parameter is the logger object + to use for printing messages. + """ + + def __init__(self, image, log=None): + """ + Initialize a class instance. The 'image' argument is full path to the + file or file object to operate on. + """ + + self._log = log + if self._log is None: + self._log = logging.getLogger(__name__) + + self._f_image_needs_close = False + + if hasattr(image, "fileno"): + self._f_image = image + self._image_path = image.name + else: + self._image_path = image + self._open_image_file() + + try: + self.image_size = os.fstat(self._f_image.fileno()).st_size + except IOError as err: + raise Error("cannot get information about file '%s': %s" + % (self._f_image.name, err)) + + try: + self.block_size = get_block_size(self._f_image) + except IOError as err: + raise Error("cannot get block size for '%s': %s" + % (self._image_path, err)) + + self.blocks_cnt = self.image_size + self.block_size - 1 + self.blocks_cnt /= self.block_size + + try: + self._f_image.flush() + except IOError as err: + raise Error("cannot flush image file '%s': %s" + % (self._image_path, err)) + + try: + os.fsync(self._f_image.fileno()), + except OSError as err: + raise Error("cannot synchronize image file '%s': %s " + % (self._image_path, err.strerror)) + + self._log.debug("opened image \"%s\"" % self._image_path) + self._log.debug("block size %d, blocks count %d, image size %d" + % (self.block_size, self.blocks_cnt, self.image_size)) + + def __del__(self): + """The class destructor which just closes the image file.""" + if self._f_image_needs_close: + self._f_image.close() + + def _open_image_file(self): + """Open the image file.""" + try: + self._f_image = open(self._image_path, 'rb') + except IOError as err: + raise Error("cannot open image file '%s': %s" + % (self._image_path, err)) + + self._f_image_needs_close = True + + def block_is_mapped(self, block): # pylint: disable=W0613,R0201 + """ + This method has has to be implemented by child classes. It returns + 'True' if block number 'block' of the image file is mapped and 'False' + otherwise. + """ + + raise Error("the method is not implemented") + + def block_is_unmapped(self, block): # pylint: disable=W0613,R0201 + """ + This method has has to be implemented by child classes. It returns + 'True' if block number 'block' of the image file is not mapped (hole) + and 'False' otherwise. + """ + + raise Error("the method is not implemented") + + def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201 + """ + This method has has to be implemented by child classes. This is a + generator which yields ranges of mapped blocks in the file. The ranges + are tuples of 2 elements: [first, last], where 'first' is the first + mapped block and 'last' is the last mapped block. + + The ranges are yielded for the area of the file of size 'count' blocks, + starting from block 'start'. + """ + + raise Error("the method is not implemented") + + def get_unmapped_ranges(self, start, count): # pylint: disable=W0613,R0201 + """ + This method has has to be implemented by child classes. Just like + 'get_mapped_ranges()', but yields unmapped block ranges instead + (holes). + """ + + raise Error("the method is not implemented") + + +# The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call +_SEEK_DATA = 3 +_SEEK_HOLE = 4 + +def _lseek(file_obj, offset, whence): + """This is a helper function which invokes 'os.lseek' for file object + 'file_obj' and with specified 'offset' and 'whence'. The 'whence' + argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When + there is no more data or hole starting from 'offset', this function + returns '-1'. Otherwise the data or hole position is returned.""" + + try: + return os.lseek(file_obj.fileno(), offset, whence) + except OSError as err: + # The 'lseek' system call returns the ENXIO if there is no data or + # hole starting from the specified offset. + if err.errno == os.errno.ENXIO: + return -1 + elif err.errno == os.errno.EINVAL: + raise ErrorNotSupp("the kernel or file-system does not support " + "\"SEEK_HOLE\" and \"SEEK_DATA\"") + else: + raise + +class FilemapSeek(_FilemapBase): + """ + This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping. + Unfortunately, the current implementation requires the caller to have write + access to the image file. + """ + + def __init__(self, image, log=None): + """Refer the '_FilemapBase' class for the documentation.""" + + # Call the base class constructor first + _FilemapBase.__init__(self, image, log) + self._log.debug("FilemapSeek: initializing") + + self._probe_seek_hole() + + def _probe_seek_hole(self): + """ + Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'. + Unfortunately, there seems to be no clean way for detecting this, + because often the system just fakes them by just assuming that all + files are fully mapped, so 'SEEK_HOLE' always returns EOF and + 'SEEK_DATA' always returns the requested offset. + + I could not invent a better way of detecting the fake 'SEEK_HOLE' + implementation than just to create a temporary file in the same + directory where the image file resides. It would be nice to change this + to something better. + """ + + directory = os.path.dirname(self._image_path) + + try: + tmp_obj = tempfile.TemporaryFile("w+", dir=directory) + except IOError as err: + raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" + % (directory, err)) + + try: + os.ftruncate(tmp_obj.fileno(), self.block_size) + except OSError as err: + raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s" + % (directory, err)) + + offs = _lseek(tmp_obj, 0, _SEEK_HOLE) + if offs != 0: + # We are dealing with the stub 'SEEK_HOLE' implementation which + # always returns EOF. + self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs) + raise ErrorNotSupp("the file-system does not support " + "\"SEEK_HOLE\" and \"SEEK_DATA\" but only " + "provides a stub implementation") + + tmp_obj.close() + + def block_is_mapped(self, block): + """Refer the '_FilemapBase' class for the documentation.""" + offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA) + if offs == -1: + result = False + else: + result = (offs / self.block_size == block) + + self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s" + % (block, result)) + return result + + def block_is_unmapped(self, block): + """Refer the '_FilemapBase' class for the documentation.""" + return not self.block_is_mapped(block) + + def _get_ranges(self, start, count, whence1, whence2): + """ + This function implements 'get_mapped_ranges()' and + 'get_unmapped_ranges()' depending on what is passed in the 'whence1' + and 'whence2' arguments. + """ + + assert whence1 != whence2 + end = start * self.block_size + limit = end + count * self.block_size + + while True: + start = _lseek(self._f_image, end, whence1) + if start == -1 or start >= limit or start == self.image_size: + break + + end = _lseek(self._f_image, start, whence2) + if end == -1 or end == self.image_size: + end = self.blocks_cnt * self.block_size + if end > limit: + end = limit + + start_blk = start / self.block_size + end_blk = end / self.block_size - 1 + self._log.debug("FilemapSeek: yielding range (%d, %d)" + % (start_blk, end_blk)) + yield (start_blk, end_blk) + + def get_mapped_ranges(self, start, count): + """Refer the '_FilemapBase' class for the documentation.""" + self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE) + + def get_unmapped_ranges(self, start, count): + """Refer the '_FilemapBase' class for the documentation.""" + self._log.debug("FilemapSeek: get_unmapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + return self._get_ranges(start, count, _SEEK_HOLE, _SEEK_DATA) + + +# Below goes the FIEMAP ioctl implementation, which is not very readable +# because it deals with the rather complex FIEMAP ioctl. To understand the +# code, you need to know the FIEMAP interface, which is documented in the +# "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources. + +# Format string for 'struct fiemap' +_FIEMAP_FORMAT = "=QQLLLL" +# sizeof(struct fiemap) +_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT) +# Format string for 'struct fiemap_extent' +_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL" +# sizeof(struct fiemap_extent) +_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT) +# The FIEMAP ioctl number +_FIEMAP_IOCTL = 0xC020660B +# This FIEMAP ioctl flag which instructs the kernel to sync the file before +# reading the block map +_FIEMAP_FLAG_SYNC = 0x00000001 +# Size of the buffer for 'struct fiemap_extent' elements which will be used +# when invoking the FIEMAP ioctl. The larger is the buffer, the less times the +# FIEMAP ioctl will be invoked. +_FIEMAP_BUFFER_SIZE = 256 * 1024 + +class FilemapFiemap(_FilemapBase): + """ + This class provides API to the FIEMAP ioctl. Namely, it allows to iterate + over all mapped blocks and over all holes. + + This class synchronizes the image file every time it invokes the FIEMAP + ioctl in order to work-around early FIEMAP implementation kernel bugs. + """ + + def __init__(self, image, log=None): + """ + Initialize a class instance. The 'image' argument is full the file + object to operate on. + """ + + # Call the base class constructor first + _FilemapBase.__init__(self, image, log) + self._log.debug("FilemapFiemap: initializing") + + self._buf_size = _FIEMAP_BUFFER_SIZE + + # Calculate how many 'struct fiemap_extent' elements fit the buffer + self._buf_size -= _FIEMAP_SIZE + self._fiemap_extent_cnt = self._buf_size / _FIEMAP_EXTENT_SIZE + assert self._fiemap_extent_cnt > 0 + self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE + self._buf_size += _FIEMAP_SIZE + + # Allocate a mutable buffer for the FIEMAP ioctl + self._buf = array.array('B', [0] * self._buf_size) + + # Check if the FIEMAP ioctl is supported + self.block_is_mapped(0) + + def _invoke_fiemap(self, block, count): + """ + Invoke the FIEMAP ioctl for 'count' blocks of the file starting from + block number 'block'. + + The full result of the operation is stored in 'self._buf' on exit. + Returns the unpacked 'struct fiemap' data structure in form of a python + list (just like 'struct.upack()'). + """ + + if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt): + raise Error("bad block number %d, should be within [0, %d]" + % (block, self.blocks_cnt)) + + # Initialize the 'struct fiemap' part of the buffer. We use the + # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is + # synchronized. The reason for this is that early FIEMAP + # implementations had many bugs related to cached dirty data, and + # synchronizing the file is a necessary work-around. + struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size, + count * self.block_size, _FIEMAP_FLAG_SYNC, 0, + self._fiemap_extent_cnt, 0) + + try: + fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1) + except IOError as err: + # Note, the FIEMAP ioctl is supported by the Linux kernel starting + # from version 2.6.28 (year 2008). + if err.errno == os.errno.EOPNOTSUPP: + errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ + "by the file-system" + self._log.debug(errstr) + raise ErrorNotSupp(errstr) + if err.errno == os.errno.ENOTTY: + errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ + "by the kernel" + self._log.debug(errstr) + raise ErrorNotSupp(errstr) + raise Error("the FIEMAP ioctl failed for '%s': %s" + % (self._image_path, err)) + + return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE]) + + def block_is_mapped(self, block): + """Refer the '_FilemapBase' class for the documentation.""" + struct_fiemap = self._invoke_fiemap(block, 1) + + # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field. + # If it contains zero, the block is not mapped, otherwise it is + # mapped. + result = bool(struct_fiemap[3]) + self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s" + % (block, result)) + return result + + def block_is_unmapped(self, block): + """Refer the '_FilemapBase' class for the documentation.""" + return not self.block_is_mapped(block) + + def _unpack_fiemap_extent(self, index): + """ + Unpack a 'struct fiemap_extent' structure object number 'index' from + the internal 'self._buf' buffer. + """ + + offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index + return struct.unpack(_FIEMAP_EXTENT_FORMAT, + self._buf[offset : offset + _FIEMAP_EXTENT_SIZE]) + + def _do_get_mapped_ranges(self, start, count): + """ + Implements most the functionality for the 'get_mapped_ranges()' + generator: invokes the FIEMAP ioctl, walks through the mapped extents + and yields mapped block ranges. However, the ranges may be consecutive + (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges + them. + """ + + block = start + while block < start + count: + struct_fiemap = self._invoke_fiemap(block, count) + + mapped_extents = struct_fiemap[3] + if mapped_extents == 0: + # No more mapped blocks + return + + extent = 0 + while extent < mapped_extents: + fiemap_extent = self._unpack_fiemap_extent(extent) + + # Start of the extent + extent_start = fiemap_extent[0] + # Starting block number of the extent + extent_block = extent_start / self.block_size + # Length of the extent + extent_len = fiemap_extent[2] + # Count of blocks in the extent + extent_count = extent_len / self.block_size + + # Extent length and offset have to be block-aligned + assert extent_start % self.block_size == 0 + assert extent_len % self.block_size == 0 + + if extent_block > start + count - 1: + return + + first = max(extent_block, block) + last = min(extent_block + extent_count, start + count) - 1 + yield (first, last) + + extent += 1 + + block = extent_block + extent_count + + def get_mapped_ranges(self, start, count): + """Refer the '_FilemapBase' class for the documentation.""" + self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + iterator = self._do_get_mapped_ranges(start, count) + first_prev, last_prev = iterator.next() + + for first, last in iterator: + if last_prev == first - 1: + last_prev = last + else: + self._log.debug("FilemapFiemap: yielding range (%d, %d)" + % (first_prev, last_prev)) + yield (first_prev, last_prev) + first_prev, last_prev = first, last + + self._log.debug("FilemapFiemap: yielding range (%d, %d)" + % (first_prev, last_prev)) + yield (first_prev, last_prev) + + def get_unmapped_ranges(self, start, count): + """Refer the '_FilemapBase' class for the documentation.""" + self._log.debug("FilemapFiemap: get_unmapped_ranges(%d, %d(%d))" + % (start, count, start + count - 1)) + hole_first = start + for first, last in self._do_get_mapped_ranges(start, count): + if first > hole_first: + self._log.debug("FilemapFiemap: yielding range (%d, %d)" + % (hole_first, first - 1)) + yield (hole_first, first - 1) + + hole_first = last + 1 + + if hole_first < start + count: + self._log.debug("FilemapFiemap: yielding range (%d, %d)" + % (hole_first, start + count - 1)) + yield (hole_first, start + count - 1) + + +def filemap(image, log=None): + """ + Create and return an instance of a Filemap class - 'FilemapFiemap' or + 'FilemapSeek', depending on what the system we run on supports. If the + FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is + returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the + 'FilemapSeek' class is returned. If none of these are supported, the + function generates an 'Error' type exception. + """ + + try: + return FilemapFiemap(image, log) + except ErrorNotSupp: + return FilemapSeek(image, log) -- 2.7.4