1 # Copyright (c) 2012 Intel, Inc.
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License, version 2,
5 # as published by the Free Software Foundation.
7 # This program is distributed in the hope that it will be useful, but
8 # WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 # General Public License for more details.
13 This module implements python implements a way to get file block. Two methods
14 are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of
15 the file seek syscall. The former is implemented by the 'FilemapFiemap' class,
16 the latter is implemented by the 'FilemapSeek' class. Both classes provide the
17 same API. The 'filemap' function automatically selects which class can be used
18 and returns an instance of the class.
21 # Disable the following pylint recommendations:
22 # * Too many instance attributes (R0902)
23 # pylint: disable=R0902
31 from mic.utils.misc import get_block_size
34 class ErrorNotSupp(Exception):
36 An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature
37 is not supported either by the kernel or the file-system.
41 class Error(Exception):
42 """A class for all the other exceptions raised by this module."""
46 class _FilemapBase(object):
48 This is a base class for a couple of other classes in this module. This
49 class simply performs the common parts of the initialization process: opens
50 the image file, gets its size, etc. The 'log' parameter is the logger object
51 to use for printing messages.
54 def __init__(self, image, log=None):
56 Initialize a class instance. The 'image' argument is full path to the
57 file or file object to operate on.
62 self._log = logging.getLogger(__name__)
64 self._f_image_needs_close = False
66 if hasattr(image, "fileno"):
68 self._image_path = image.name
70 self._image_path = image
71 self._open_image_file()
74 self.image_size = os.fstat(self._f_image.fileno()).st_size
75 except IOError as err:
76 raise Error("cannot get information about file '%s': %s"
77 % (self._f_image.name, err))
80 self.block_size = get_block_size(self._f_image)
81 except IOError as err:
82 raise Error("cannot get block size for '%s': %s"
83 % (self._image_path, err))
85 self.blocks_cnt = self.image_size + self.block_size - 1
86 self.blocks_cnt /= self.block_size
90 except IOError as err:
91 raise Error("cannot flush image file '%s': %s"
92 % (self._image_path, err))
95 os.fsync(self._f_image.fileno()),
96 except OSError as err:
97 raise Error("cannot synchronize image file '%s': %s "
98 % (self._image_path, err.strerror))
100 self._log.debug("opened image \"%s\"" % self._image_path)
101 self._log.debug("block size %d, blocks count %d, image size %d"
102 % (self.block_size, self.blocks_cnt, self.image_size))
105 """The class destructor which just closes the image file."""
106 if self._f_image_needs_close:
107 self._f_image.close()
109 def _open_image_file(self):
110 """Open the image file."""
112 self._f_image = open(self._image_path, 'rb')
113 except IOError as err:
114 raise Error("cannot open image file '%s': %s"
115 % (self._image_path, err))
117 self._f_image_needs_close = True
119 def block_is_mapped(self, block): # pylint: disable=W0613,R0201
121 This method has has to be implemented by child classes. It returns
122 'True' if block number 'block' of the image file is mapped and 'False'
126 raise Error("the method is not implemented")
128 def block_is_unmapped(self, block): # pylint: disable=W0613,R0201
130 This method has has to be implemented by child classes. It returns
131 'True' if block number 'block' of the image file is not mapped (hole)
132 and 'False' otherwise.
135 raise Error("the method is not implemented")
137 def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201
139 This method has has to be implemented by child classes. This is a
140 generator which yields ranges of mapped blocks in the file. The ranges
141 are tuples of 2 elements: [first, last], where 'first' is the first
142 mapped block and 'last' is the last mapped block.
144 The ranges are yielded for the area of the file of size 'count' blocks,
145 starting from block 'start'.
148 raise Error("the method is not implemented")
150 def get_unmapped_ranges(self, start, count): # pylint: disable=W0613,R0201
152 This method has has to be implemented by child classes. Just like
153 'get_mapped_ranges()', but yields unmapped block ranges instead
157 raise Error("the method is not implemented")
160 # The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call
164 def _lseek(file_obj, offset, whence):
165 """This is a helper function which invokes 'os.lseek' for file object
166 'file_obj' and with specified 'offset' and 'whence'. The 'whence'
167 argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When
168 there is no more data or hole starting from 'offset', this function
169 returns '-1'. Otherwise the data or hole position is returned."""
172 return os.lseek(file_obj.fileno(), offset, whence)
173 except OSError as err:
174 # The 'lseek' system call returns the ENXIO if there is no data or
175 # hole starting from the specified offset.
176 if err.errno == os.errno.ENXIO:
178 elif err.errno == os.errno.EINVAL:
179 raise ErrorNotSupp("the kernel or file-system does not support "
180 "\"SEEK_HOLE\" and \"SEEK_DATA\"")
184 class FilemapSeek(_FilemapBase):
186 This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping.
187 Unfortunately, the current implementation requires the caller to have write
188 access to the image file.
191 def __init__(self, image, log=None):
192 """Refer the '_FilemapBase' class for the documentation."""
194 # Call the base class constructor first
195 _FilemapBase.__init__(self, image, log)
196 self._log.debug("FilemapSeek: initializing")
198 self._probe_seek_hole()
200 def _probe_seek_hole(self):
202 Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'.
203 Unfortunately, there seems to be no clean way for detecting this,
204 because often the system just fakes them by just assuming that all
205 files are fully mapped, so 'SEEK_HOLE' always returns EOF and
206 'SEEK_DATA' always returns the requested offset.
208 I could not invent a better way of detecting the fake 'SEEK_HOLE'
209 implementation than just to create a temporary file in the same
210 directory where the image file resides. It would be nice to change this
214 directory = os.path.dirname(self._image_path)
217 tmp_obj = tempfile.TemporaryFile("w+", dir=directory)
218 except IOError as err:
219 raise ErrorNotSupp("cannot create a temporary in \"%s\": %s"
223 os.ftruncate(tmp_obj.fileno(), self.block_size)
224 except OSError as err:
225 raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s"
228 offs = _lseek(tmp_obj, 0, _SEEK_HOLE)
230 # We are dealing with the stub 'SEEK_HOLE' implementation which
231 # always returns EOF.
232 self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs)
233 raise ErrorNotSupp("the file-system does not support "
234 "\"SEEK_HOLE\" and \"SEEK_DATA\" but only "
235 "provides a stub implementation")
239 def block_is_mapped(self, block):
240 """Refer the '_FilemapBase' class for the documentation."""
241 offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA)
245 result = (offs / self.block_size == block)
247 self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s"
251 def block_is_unmapped(self, block):
252 """Refer the '_FilemapBase' class for the documentation."""
253 return not self.block_is_mapped(block)
255 def _get_ranges(self, start, count, whence1, whence2):
257 This function implements 'get_mapped_ranges()' and
258 'get_unmapped_ranges()' depending on what is passed in the 'whence1'
259 and 'whence2' arguments.
262 assert whence1 != whence2
263 end = start * self.block_size
264 limit = end + count * self.block_size
267 start = _lseek(self._f_image, end, whence1)
268 if start == -1 or start >= limit or start == self.image_size:
271 end = _lseek(self._f_image, start, whence2)
272 if end == -1 or end == self.image_size:
273 end = self.blocks_cnt * self.block_size
277 start_blk = start / self.block_size
278 end_blk = end / self.block_size - 1
279 self._log.debug("FilemapSeek: yielding range (%d, %d)"
280 % (start_blk, end_blk))
281 yield (start_blk, end_blk)
283 def get_mapped_ranges(self, start, count):
284 """Refer the '_FilemapBase' class for the documentation."""
285 self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))"
286 % (start, count, start + count - 1))
287 return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE)
289 def get_unmapped_ranges(self, start, count):
290 """Refer the '_FilemapBase' class for the documentation."""
291 self._log.debug("FilemapSeek: get_unmapped_ranges(%d, %d(%d))"
292 % (start, count, start + count - 1))
293 return self._get_ranges(start, count, _SEEK_HOLE, _SEEK_DATA)
296 # Below goes the FIEMAP ioctl implementation, which is not very readable
297 # because it deals with the rather complex FIEMAP ioctl. To understand the
298 # code, you need to know the FIEMAP interface, which is documented in the
299 # "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources.
301 # Format string for 'struct fiemap'
302 _FIEMAP_FORMAT = "=QQLLLL"
303 # sizeof(struct fiemap)
304 _FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT)
305 # Format string for 'struct fiemap_extent'
306 _FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL"
307 # sizeof(struct fiemap_extent)
308 _FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT)
309 # The FIEMAP ioctl number
310 _FIEMAP_IOCTL = 0xC020660B
311 # This FIEMAP ioctl flag which instructs the kernel to sync the file before
312 # reading the block map
313 _FIEMAP_FLAG_SYNC = 0x00000001
314 # Size of the buffer for 'struct fiemap_extent' elements which will be used
315 # when invoking the FIEMAP ioctl. The larger is the buffer, the less times the
316 # FIEMAP ioctl will be invoked.
317 _FIEMAP_BUFFER_SIZE = 256 * 1024
319 class FilemapFiemap(_FilemapBase):
321 This class provides API to the FIEMAP ioctl. Namely, it allows to iterate
322 over all mapped blocks and over all holes.
324 This class synchronizes the image file every time it invokes the FIEMAP
325 ioctl in order to work-around early FIEMAP implementation kernel bugs.
328 def __init__(self, image, log=None):
330 Initialize a class instance. The 'image' argument is full the file
331 object to operate on.
334 # Call the base class constructor first
335 _FilemapBase.__init__(self, image, log)
336 self._log.debug("FilemapFiemap: initializing")
338 self._buf_size = _FIEMAP_BUFFER_SIZE
340 # Calculate how many 'struct fiemap_extent' elements fit the buffer
341 self._buf_size -= _FIEMAP_SIZE
342 self._fiemap_extent_cnt = self._buf_size / _FIEMAP_EXTENT_SIZE
343 assert self._fiemap_extent_cnt > 0
344 self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE
345 self._buf_size += _FIEMAP_SIZE
347 # Allocate a mutable buffer for the FIEMAP ioctl
348 self._buf = array.array('B', [0] * self._buf_size)
350 # Check if the FIEMAP ioctl is supported
351 self.block_is_mapped(0)
353 def _invoke_fiemap(self, block, count):
355 Invoke the FIEMAP ioctl for 'count' blocks of the file starting from
356 block number 'block'.
358 The full result of the operation is stored in 'self._buf' on exit.
359 Returns the unpacked 'struct fiemap' data structure in form of a python
360 list (just like 'struct.upack()').
363 if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt):
364 raise Error("bad block number %d, should be within [0, %d]"
365 % (block, self.blocks_cnt))
367 # Initialize the 'struct fiemap' part of the buffer. We use the
368 # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is
369 # synchronized. The reason for this is that early FIEMAP
370 # implementations had many bugs related to cached dirty data, and
371 # synchronizing the file is a necessary work-around.
372 struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size,
373 count * self.block_size, _FIEMAP_FLAG_SYNC, 0,
374 self._fiemap_extent_cnt, 0)
377 fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1)
378 except IOError as err:
379 # Note, the FIEMAP ioctl is supported by the Linux kernel starting
380 # from version 2.6.28 (year 2008).
381 if err.errno == os.errno.EOPNOTSUPP:
382 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
384 self._log.debug(errstr)
385 raise ErrorNotSupp(errstr)
386 if err.errno == os.errno.ENOTTY:
387 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
389 self._log.debug(errstr)
390 raise ErrorNotSupp(errstr)
391 raise Error("the FIEMAP ioctl failed for '%s': %s"
392 % (self._image_path, err))
394 return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE])
396 def block_is_mapped(self, block):
397 """Refer the '_FilemapBase' class for the documentation."""
398 struct_fiemap = self._invoke_fiemap(block, 1)
400 # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field.
401 # If it contains zero, the block is not mapped, otherwise it is
403 result = bool(struct_fiemap[3])
404 self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s"
408 def block_is_unmapped(self, block):
409 """Refer the '_FilemapBase' class for the documentation."""
410 return not self.block_is_mapped(block)
412 def _unpack_fiemap_extent(self, index):
414 Unpack a 'struct fiemap_extent' structure object number 'index' from
415 the internal 'self._buf' buffer.
418 offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index
419 return struct.unpack(_FIEMAP_EXTENT_FORMAT,
420 self._buf[offset : offset + _FIEMAP_EXTENT_SIZE])
422 def _do_get_mapped_ranges(self, start, count):
424 Implements most the functionality for the 'get_mapped_ranges()'
425 generator: invokes the FIEMAP ioctl, walks through the mapped extents
426 and yields mapped block ranges. However, the ranges may be consecutive
427 (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges
432 while block < start + count:
433 struct_fiemap = self._invoke_fiemap(block, count)
435 mapped_extents = struct_fiemap[3]
436 if mapped_extents == 0:
437 # No more mapped blocks
441 while extent < mapped_extents:
442 fiemap_extent = self._unpack_fiemap_extent(extent)
444 # Start of the extent
445 extent_start = fiemap_extent[0]
446 # Starting block number of the extent
447 extent_block = extent_start / self.block_size
448 # Length of the extent
449 extent_len = fiemap_extent[2]
450 # Count of blocks in the extent
451 extent_count = extent_len / self.block_size
453 # Extent length and offset have to be block-aligned
454 assert extent_start % self.block_size == 0
455 assert extent_len % self.block_size == 0
457 if extent_block > start + count - 1:
460 first = max(extent_block, block)
461 last = min(extent_block + extent_count, start + count) - 1
466 block = extent_block + extent_count
468 def get_mapped_ranges(self, start, count):
469 """Refer the '_FilemapBase' class for the documentation."""
470 self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))"
471 % (start, count, start + count - 1))
472 iterator = self._do_get_mapped_ranges(start, count)
473 first_prev, last_prev = iterator.next()
475 for first, last in iterator:
476 if last_prev == first - 1:
479 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
480 % (first_prev, last_prev))
481 yield (first_prev, last_prev)
482 first_prev, last_prev = first, last
484 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
485 % (first_prev, last_prev))
486 yield (first_prev, last_prev)
488 def get_unmapped_ranges(self, start, count):
489 """Refer the '_FilemapBase' class for the documentation."""
490 self._log.debug("FilemapFiemap: get_unmapped_ranges(%d, %d(%d))"
491 % (start, count, start + count - 1))
493 for first, last in self._do_get_mapped_ranges(start, count):
494 if first > hole_first:
495 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
496 % (hole_first, first - 1))
497 yield (hole_first, first - 1)
499 hole_first = last + 1
501 if hole_first < start + count:
502 self._log.debug("FilemapFiemap: yielding range (%d, %d)"
503 % (hole_first, start + count - 1))
504 yield (hole_first, start + count - 1)
507 def filemap(image, log=None):
509 Create and return an instance of a Filemap class - 'FilemapFiemap' or
510 'FilemapSeek', depending on what the system we run on supports. If the
511 FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is
512 returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the
513 'FilemapSeek' class is returned. If none of these are supported, the
514 function generates an 'Error' type exception.
518 return FilemapFiemap(image, log)
520 return FilemapSeek(image, log)