import warnings
class ReadFilesFromTarIterDataPipe(IterDataPipe[Tuple[str, BufferedIOBase]]):
- r""" :class:`ReadFilesFromTarIDP`.
+ r""":class:`ReadFilesFromTarIterDataPipe`.
Iterable datapipe to extract tar binary streams from input iterable which contains tuples of
pathname and tar binary stream, yields pathname and extracted binary stream in a tuple.
args:
datapipe: Iterable datapipe that provides pathname and tar binary stream in tuples
+ mode: File mode used by `tarfile.open` to read file object. Mode has to be a string of the form 'filemode[:compression]'
length: a nominal length of the datapipe
Note:
or let Python's GC close them periodly.
"""
def __init__(
- self,
- datapipe : Iterable[Tuple[str, BufferedIOBase]],
- length : int = -1):
+ self,
+ datapipe : Iterable[Tuple[str, BufferedIOBase]],
+ mode: str = "r:*",
+ length : int = -1
+ ):
super().__init__()
- self.datapipe : Iterable[Tuple[str, BufferedIOBase]] = datapipe
- self.length : int = length
-
+ self.datapipe: Iterable[Tuple[str, BufferedIOBase]] = datapipe
+ self.mode = mode
+ self.length: int = length
def __iter__(self) -> Iterator[Tuple[str, BufferedIOBase]]:
if not isinstance(self.datapipe, Iterable):
pathname, data_stream = data
try:
# typing.cast is used here to silence mypy's type checker
- tar = tarfile.open(fileobj=cast(Optional[IO[bytes]], data_stream), mode="r:*")
+ tar = tarfile.open(fileobj=cast(Optional[IO[bytes]], data_stream), mode=self.mode)
for tarinfo in tar:
if not tarinfo.isfile():
continue
"Unable to extract files from corrupted tarfile stream {} due to: {}, abort!".format(pathname, e))
raise e
-
def __len__(self):
if self.length == -1:
raise TypeError("{} instance doesn't have valid length".format(type(self).__name__))