From: Artem Bityutskiy Date: Fri, 14 Dec 2012 13:49:11 +0000 (+0200) Subject: TransRead: implement reading from file object X-Git-Tag: v2.0~50 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=96b423e1a16386b2386659908d99879bc09b96da;p=tools%2Fbmap-tools.git TransRead: implement reading from file object This patch changes the way we open the file: instead of opening by name, open it by its file object. This is not needed right now, but one of the next patches will implement reading from an URL, in which case we'll have to be able to read and decompress from a urlib file-object. In other words, this is a preparations. The good thing is that both tarfile and gzip modules allow to open by file objects. However, the bad news is that bzip2 module does not support this. This is why we implement the '_Bzip2Read' class which is a simple wrapper over bzip2's 'stream decompressor': we just read from the back-end file-like object, stream the data trhough the bzip2 decompressor, and provide to the caller. Change-Id: I241c1dad4a0bc9619af61f393e6834b9568545fc Signed-off-by: Artem Bityutskiy --- diff --git a/bmaptools/TransRead.py b/bmaptools/TransRead.py index 58f067d..af02b49 100644 --- a/bmaptools/TransRead.py +++ b/bmaptools/TransRead.py @@ -4,10 +4,147 @@ decompress the contents on-the-fly. """ import os import stat +import bz2 # A list of supported compression types SUPPORTED_COMPRESSION_TYPES = ('bz2', 'gz', 'tar.gz', 'tgz', 'tar.bz2') +def _fake_seek_forward(file_obj, cur_pos, offset, whence = os.SEEK_SET): + """ Seek to a specified offset. We only support seeking forward and + only relative to the beginning of the file and to the current + position. The arguments are: + 1. 'file_obj' - file-like object to emulate 'seek()' for + 2. 'cur_pos' - current file position of 'file_ojb', which supposedly + also does not support 'tell()' + 3. 'offset' and 'whence' are the standard 'seek()' arguments + + Returns the new 'file_obj' position. """ + + if whence == os.SEEK_SET: + new_pos = offset + elif whence == os.SEEK_CUR: + new_pos = cur_pos + offset + else: + raise Error("_Bzip2Read's 'seek()' method requires 'whence' " \ + "argument to be %d or %d, but %d was passed" \ + % (os.SEEK_SET, os.SEEK_CUR, whence)) + + if new_pos < cur_pos: + raise Error("_Bzip2Read' seek() method supports only seeking " \ + "forward, seeking from %d to %d is not allowed" \ + % (cur_pos, new_pos)) + + length = new_pos - cur_pos + to_read = length + while to_read > 0: + buf = file_obj.read(to_read) + if not buf: + break + to_read -= len(buf) + + cur_pos = cur_pos + (length - to_read) + + if to_read < 0: + raise Error("seeked too far: %d instead of %d" % (cur_pos, new_pos)) + + return cur_pos + +class _Bzip2Read: + """ This class implements transparent reading from a bzip2-compressed + file-like object and decompressing the contents on-the-fly. The only reason + this class exists is that the standard python 2 bz2.Bzip2File() class does + not accept file-like objects and requires a file name. + + To read a bzip2-compressed file-like object, create an instance of this + class and use its 'read()' method. In other words, the instances of this + class are "read-only" file-like objects. 'seek()' is supported, but only + forward. + + Note, this class is very simple and does not implement many things, e.g., + there is no locking. """ + + def __init__(self, file_obj): + """ Class constructor. The 'file_ojb' argument is the bzip2-compressed + file-like object to read from. """ + + self._pos = 0 + self._file_obj = file_obj + self._decompressor = bz2.BZ2Decompressor() + self._buffer = '' + self._buffer_pos = 0 + self._eof = False + + def _read_from_buffer(self, length): + """ Read from the internal buffer which contains the extra data we read + last time. """ + + buffer_len = len(self._buffer) + if buffer_len - self._buffer_pos > length: + data = self._buffer[self._buffer_pos:self._buffer_pos + length] + self._buffer_pos += length + else: + data = self._buffer[self._buffer_pos:] + self._buffer = '' + self._buffer_pos = 0 + + return data + + def read(self, size): + """ Read the bzip2-compressed file, uncompress the data on-the-fly, and + return 'size' bytes of the uncompressed data. """ + + assert self._pos >= 0 + assert self._buffer_pos >= 0 + assert self._buffer_pos <= len(self._buffer) + + if self._eof: + return '' + + # Fetch the data from the buffers first + data = self._read_from_buffer(size) + size -= len(data) + + # If the buffers did not contain all the requested data, read them, + # decompress, and buffer. + chunk_size = max(size, 128 * 1024) + while size > 0: + buf = self._file_obj.read(chunk_size) + if not buf: + self._eof = True + break + + buf = self._decompressor.decompress(buf) + if not buf: + continue + + assert len(self._buffer) == 0 + assert self._buffer_pos == 0 + + if len(buf) >= size: + self._buffer = buf + data += self._read_from_buffer(size) + else: + data += buf + + size -= len(buf) + + self._pos += len(data) + return data + + def seek(self, offset, whence = os.SEEK_SET): + """ Fake 'seek()' implementation limited to seeking forward. """ + + _fake_seek_forward(self, self._pos, offset, whence) + + def tell(self): + """ Return current position. """ + + return self._pos + + def close(self): + """ Close the file-like object. """ + pass + class Error(Exception): """ A class for exceptions generated by this module. We currently support only one type of exceptions, and we basically throw human-readable problem @@ -31,7 +168,7 @@ class TransRead: or self.filepath.endswith('.tgz'): import tarfile - tar = tarfile.open(self.filepath, 'r') + tar = tarfile.open(fileobj = self._file_obj, mode = 'r') # The tarball is supposed to contain only one single member members = tar.getmembers() if len(members) > 1: @@ -46,11 +183,10 @@ class TransRead: elif self.filepath.endswith('.gz'): import gzip - self._transfile_obj = gzip.GzipFile(self.filepath, 'rb') + self._transfile_obj = gzip.GzipFile(fileobj = self._file_obj, + mode = 'rb') elif self.filepath.endswith('.bz2'): - import bz2 - - self._transfile_obj = bz2.BZ2File(self.filepath, 'rb') + self._transfile_obj = _Bzip2Read(self._file_obj) else: self.is_compressed = False self._transfile_obj = self._file_obj