import numpy as np
import torch
-import torch.nn as nn
import torch.utils.data.backward_compatibility
import torch.utils.data.datapipes as dp
import torch.utils.data.graph
)
try:
- import torchvision.transforms
- HAS_TORCHVISION = True
-except ImportError:
- HAS_TORCHVISION = False
-skipIfNoTorchVision = skipIf(not HAS_TORCHVISION, "no torchvision")
-
-try:
import dill
# XXX: By default, dill writes the Pickler dispatch table to inject its
# own logic there. This globally affects the behavior of the standard library
def test_listdirfiles_iterable_datapipe(self):
temp_dir = self.temp_dir.name
- datapipe = dp.iter.ListDirFiles(temp_dir, '')
+ datapipe = dp.iter.FileLister(temp_dir, '')
count = 0
for pathname in datapipe:
self.assertEqual(count, len(self.temp_files))
count = 0
- datapipe = dp.iter.ListDirFiles(temp_dir, '', recursive=True)
+ datapipe = dp.iter.FileLister(temp_dir, '', recursive=True)
for pathname in datapipe:
count = count + 1
self.assertTrue((pathname in self.temp_files) or (pathname in self.temp_sub_files))
def test_loadfilesfromdisk_iterable_datapipe(self):
# test import datapipe class directly
from torch.utils.data.datapipes.iter import (
- ListDirFiles,
- LoadFilesFromDisk,
+ FileLister,
+ FileLoader,
)
temp_dir = self.temp_dir.name
- datapipe1 = ListDirFiles(temp_dir, '')
- datapipe2 = LoadFilesFromDisk(datapipe1)
+ datapipe1 = FileLister(temp_dir, '')
+ datapipe2 = FileLoader(datapipe1)
count = 0
for rec in datapipe2:
tar.add(self.temp_files[0])
tar.add(self.temp_files[1])
tar.add(self.temp_files[2])
- datapipe1 = dp.iter.ListDirFiles(temp_dir, '*.tar')
- datapipe2 = dp.iter.LoadFilesFromDisk(datapipe1)
- datapipe3 = dp.iter.ReadFilesFromTar(datapipe2)
+ datapipe1 = dp.iter.FileLister(temp_dir, '*.tar')
+ datapipe2 = dp.iter.FileLoader(datapipe1)
+ datapipe3 = dp.iter.TarArchiveReader(datapipe2)
# read extracted files before reaching the end of the tarfile
for rec, temp_file in itertools.zip_longest(datapipe3, self.temp_files):
self.assertTrue(rec is not None and temp_file is not None)
myzip.write(self.temp_files[0])
myzip.write(self.temp_files[1])
myzip.write(self.temp_files[2])
- datapipe1 = dp.iter.ListDirFiles(temp_dir, '*.zip')
- datapipe2 = dp.iter.LoadFilesFromDisk(datapipe1)
- datapipe3 = dp.iter.ReadFilesFromZip(datapipe2)
+ datapipe1 = dp.iter.FileLister(temp_dir, '*.zip')
+ datapipe2 = dp.iter.FileLoader(datapipe1)
+ datapipe3 = dp.iter.ZipArchiveReader(datapipe2)
# read extracted files before reaching the end of the zipfile
for rec, temp_file in itertools.zip_longest(datapipe3, self.temp_files):
self.assertTrue(rec is not None and temp_file is not None)
temp_pngfile_pathname = os.path.join(temp_dir, "test_png.png")
png_data = np.array([[[1., 0., 0.], [1., 0., 0.]], [[1., 0., 0.], [1., 0., 0.]]], dtype=np.single)
np.save(temp_pngfile_pathname, png_data)
- datapipe1 = dp.iter.ListDirFiles(temp_dir, ['*.png', '*.txt'])
- datapipe2 = dp.iter.LoadFilesFromDisk(datapipe1)
+ datapipe1 = dp.iter.FileLister(temp_dir, ['*.png', '*.txt'])
+ datapipe2 = dp.iter.FileLoader(datapipe1)
def _png_decoder(extension, data):
if extension != 'png':
f.write('12345abcde')
tar.add(file_pathname)
- datapipe1 = dp.iter.ListDirFiles(temp_dir, '*.tar')
- datapipe2 = dp.iter.LoadFilesFromDisk(datapipe1)
- datapipe3 = dp.iter.ReadFilesFromTar(datapipe2)
- datapipe4 = dp.iter.GroupByKey(datapipe3, group_size=2)
+ datapipe1 = dp.iter.FileLister(temp_dir, '*.tar')
+ datapipe2 = dp.iter.FileLoader(datapipe1)
+ datapipe3 = dp.iter.TarArchiveReader(datapipe2)
+ datapipe4 = dp.iter.ByKeyGrouper(datapipe3, group_size=2)
expected_result = [("a.png", "a.json"), ("c.png", "c.json"), ("b.png", "b.json"), ("d.png", "d.json"), (
"f.png", "f.json"), ("g.png", "g.json"), ("e.png", "e.json"), ("h.json", "h.txt")]
create_temp_files_for_serving(tmpdir, test_file_count,
test_file_size, file_url_template)
- datapipe_dir_f = dp.iter.ListDirFiles(tmpdir, '*_list')
- datapipe_f_lines = dp.iter.ReadLinesFromFile(datapipe_dir_f)
+ datapipe_dir_f = dp.iter.FileLister(tmpdir, '*_list')
+ datapipe_stream = dp.iter.FileLoader(datapipe_dir_f)
+ datapipe_f_lines = dp.iter.LineReader(datapipe_stream)
datapipe_line_url: IterDataPipe[str] = \
- dp.iter.Map(datapipe_f_lines, _get_data_from_tuple_fn, (1,))
+ dp.iter.Mapper(datapipe_f_lines, _get_data_from_tuple_fn, (1,))
datapipe_http = dp.iter.HttpReader(datapipe_line_url,
timeout=timeout)
- datapipe_tob = dp.iter.ToBytes(datapipe_http, chunk=chunk)
+ datapipe_tob = dp.iter.StreamReader(datapipe_http, chunk=chunk)
for (url, data) in datapipe_tob:
self.assertGreater(len(url), 0)
def _test_picklable(self):
arr = range(10)
picklable_datapipes: List[Tuple[Type[IterDataPipe], IterDataPipe, Tuple, Dict[str, Any]]] = [
- (dp.iter.Map, IDP(arr), (), {}),
- (dp.iter.Map, IDP(arr), (_fake_fn, (0, ), {'test': True}), {}),
- (dp.iter.Collate, IDP(arr), (), {}),
- (dp.iter.Collate, IDP(arr), (_fake_fn, (0, ), {'test': True}), {}),
+ (dp.iter.Mapper, IDP(arr), (), {}),
+ (dp.iter.Mapper, IDP(arr), (_fake_fn, (0, ), {'test': True}), {}),
+ (dp.iter.Collator, IDP(arr), (), {}),
+ (dp.iter.Collator, IDP(arr), (_fake_fn, (0, ), {'test': True}), {}),
(dp.iter.Filter, IDP(arr), (_fake_filter_fn, (0, ), {'test': True}), {}),
]
for dpipe, input_dp, dp_args, dp_kwargs in picklable_datapipes:
p = pickle.dumps(dpipe(input_dp, *dp_args, **dp_kwargs)) # type: ignore[call-arg]
unpicklable_datapipes: List[Tuple[Type[IterDataPipe], IterDataPipe, Tuple, Dict[str, Any]]] = [
- (dp.iter.Map, IDP(arr), (lambda x: x, ), {}),
- (dp.iter.Collate, IDP(arr), (lambda x: x, ), {}),
+ (dp.iter.Mapper, IDP(arr), (lambda x: x, ), {}),
+ (dp.iter.Collator, IDP(arr), (lambda x: x, ), {}),
(dp.iter.Filter, IDP(arr), (lambda x: x >= 5, ), {}),
]
for dpipe, input_dp, dp_args, dp_kwargs in unpicklable_datapipes:
input_dp2 = IDP(range(5))
with self.assertRaisesRegex(ValueError, r"Expected at least one DataPipe"):
- dp.iter.Concat()
+ dp.iter.Concater()
with self.assertRaisesRegex(TypeError, r"Expected all inputs to be `IterDataPipe`"):
- dp.iter.Concat(input_dp1, ()) # type: ignore[arg-type]
+ dp.iter.Concater(input_dp1, ()) # type: ignore[arg-type]
concat_dp = input_dp1.concat(input_dp2)
self.assertEqual(len(concat_dp), 15)
with self.assertRaisesRegex(TypeError, r"instance doesn't have valid length$"):
len(shuffle_dp_nl)
- @skipIfNoTorchVision
- def test_transforms_datapipe(self):
- torch.set_default_dtype(torch.float)
- # A sequence of numpy random numbers representing 3-channel images
- w = h = 32
- inputs = [np.random.randint(0, 255, (h, w, 3), dtype=np.uint8) for i in range(10)]
- tensor_inputs = [torch.tensor(x, dtype=torch.float).permute(2, 0, 1) / 255. for x in inputs]
-
- input_dp = IDP(inputs)
- # Raise TypeError for python function
- with self.assertRaisesRegex(TypeError, r"`transforms` are required to be"):
- input_dp.legacy_transforms(_fake_fn)
-
- # transforms.Compose of several transforms
- transforms = torchvision.transforms.Compose([
- torchvision.transforms.ToTensor(),
- torchvision.transforms.Pad(1, fill=1, padding_mode='constant'),
- ])
- tsfm_dp = input_dp.legacy_transforms(transforms)
- self.assertEqual(len(tsfm_dp), len(input_dp))
- for tsfm_data, input_data in zip(tsfm_dp, tensor_inputs):
- self.assertEqual(tsfm_data[:, 1:(h + 1), 1:(w + 1)], input_data)
-
- # nn.Sequential of several transforms (required to be instances of nn.Module)
- input_dp = IDP(tensor_inputs)
- transforms = nn.Sequential(
- torchvision.transforms.Pad(1, fill=1, padding_mode='constant'),
- )
- tsfm_dp = input_dp.legacy_transforms(transforms)
- self.assertEqual(len(tsfm_dp), len(input_dp))
- for tsfm_data, input_data in zip(tsfm_dp, tensor_inputs):
- self.assertEqual(tsfm_data[:, 1:(h + 1), 1:(w + 1)], input_data)
-
- # Single transform
- input_dp = IDP_NoLen(inputs) # type: ignore[assignment]
- transform = torchvision.transforms.ToTensor()
- tsfm_dp = input_dp.legacy_transforms(transform)
- with self.assertRaisesRegex(TypeError, r"instance doesn't have valid length$"):
- len(tsfm_dp)
- for tsfm_data, input_data in zip(tsfm_dp, tensor_inputs):
- self.assertEqual(tsfm_data, input_data)
-
def test_zip_datapipe(self):
with self.assertRaises(TypeError):
- dp.iter.Zip(IDP(range(10)), list(range(10))) # type: ignore[arg-type]
+ dp.iter.Zipper(IDP(range(10)), list(range(10))) # type: ignore[arg-type]
- zipped_dp = dp.iter.Zip(IDP(range(10)), IDP_NoLen(range(5))) # type: ignore[var-annotated]
+ zipped_dp = dp.iter.Zipper(IDP(range(10)), IDP_NoLen(range(5))) # type: ignore[var-annotated]
with self.assertRaisesRegex(TypeError, r"instance doesn't have valid length$"):
len(zipped_dp)
exp = list((i, i) for i in range(5))
self.assertEqual(list(zipped_dp), exp)
- zipped_dp = dp.iter.Zip(IDP(range(10)), IDP(range(5)))
+ zipped_dp = dp.iter.Zipper(IDP(range(10)), IDP(range(5)))
self.assertEqual(len(zipped_dp), 5)
self.assertEqual(list(zipped_dp), exp)
# Reset
picklable_datapipes: List[
Tuple[Type[MapDataPipe], MapDataPipe, Tuple, Dict[str, Any]]
] = [
- (dp.map.Map, MDP(arr), (), {}),
- (dp.map.Map, MDP(arr), (_fake_fn, (0,), {'test': True}), {}),
+ (dp.map.Mapper, MDP(arr), (), {}),
+ (dp.map.Mapper, MDP(arr), (_fake_fn, (0,), {'test': True}), {}),
]
for dpipe, input_dp, dp_args, dp_kwargs in picklable_datapipes:
p = pickle.dumps(dpipe(input_dp, *dp_args, **dp_kwargs)) # type: ignore[call-arg]
unpicklable_datapipes: List[
Tuple[Type[MapDataPipe], MapDataPipe, Tuple, Dict[str, Any]]
] = [
- (dp.map.Map, MDP(arr), (lambda x: x,), {}),
+ (dp.map.Mapper, MDP(arr), (lambda x: x,), {}),
]
for dpipe, input_dp, dp_args, dp_kwargs in unpicklable_datapipes:
with warnings.catch_warnings(record=True) as wa:
input_dp2 = MDP(range(5))
with self.assertRaisesRegex(ValueError, r"Expected at least one DataPipe"):
- dp.map.Concat()
+ dp.map.Concater()
with self.assertRaisesRegex(TypeError, r"Expected all inputs to be `MapDataPipe`"):
- dp.map.Concat(input_dp1, ()) # type: ignore[arg-type]
+ dp.map.Concater(input_dp1, ()) # type: ignore[arg-type]
concat_dp = input_dp1.concat(input_dp2)
self.assertEqual(len(concat_dp), 15)
from torch.utils.data.datapipes.iter.callable import (
- CollateIterDataPipe as Collate,
- MapIterDataPipe as Map,
- TransformsIterDataPipe as Transforms,
+ CollatorIterDataPipe as Collator,
+ MapperIterDataPipe as Mapper,
)
from torch.utils.data.datapipes.iter.combinatorics import (
SamplerIterDataPipe as Sampler,
- ShuffleIterDataPipe as Shuffle,
+ ShufflerIterDataPipe as Shuffler,
)
from torch.utils.data.datapipes.iter.combining import (
- ConcatIterDataPipe as Concat,
- ZipIterDataPipe as Zip,
+ ConcaterIterDataPipe as Concater,
+ ZipperIterDataPipe as Zipper,
+)
+from torch.utils.data.datapipes.iter.filelister import (
+ FileListerIterDataPipe as FileLister,
+)
+from torch.utils.data.datapipes.iter.fileloader import (
+ FileLoaderIterDataPipe as FileLoader,
)
from torch.utils.data.datapipes.iter.grouping import (
- BatchIterDataPipe as Batch,
+ BatcherIterDataPipe as Batcher,
BucketBatcherIterDataPipe as BucketBatcher,
- GroupByKeyIterDataPipe as GroupByKey,
+ ByKeyGrouperIterDataPipe as ByKeyGrouper,
)
from torch.utils.data.datapipes.iter.httpreader import (
HTTPReaderIterDataPipe as HttpReader,
)
-from torch.utils.data.datapipes.iter.listdirfiles import (
- ListDirFilesIterDataPipe as ListDirFiles,
-)
-from torch.utils.data.datapipes.iter.loadfilesfromdisk import (
- LoadFilesFromDiskIterDataPipe as LoadFilesFromDisk,
-)
-from torch.utils.data.datapipes.iter.readfilesfromtar import (
- ReadFilesFromTarIterDataPipe as ReadFilesFromTar,
-)
-from torch.utils.data.datapipes.iter.readfilesfromzip import (
- ReadFilesFromZipIterDataPipe as ReadFilesFromZip,
-)
-from torch.utils.data.datapipes.iter.readlinesfromfile import (
- ReadLinesFromFileIterDataPipe as ReadLinesFromFile,
+from torch.utils.data.datapipes.iter.linereader import (
+ LineReaderIterDataPipe as LineReader,
)
from torch.utils.data.datapipes.iter.routeddecoder import (
RoutedDecoderIterDataPipe as RoutedDecoder,
from torch.utils.data.datapipes.iter.selecting import (
FilterIterDataPipe as Filter,
)
-from torch.utils.data.datapipes.iter.tobytes import (
- ToBytesIterDataPipe as ToBytes,
+from torch.utils.data.datapipes.iter.streamreader import (
+ StreamReaderIterDataPipe as StreamReader,
+)
+from torch.utils.data.datapipes.iter.tararchivereader import (
+ TarArchiveReaderIterDataPipe as TarArchiveReader,
+)
+from torch.utils.data.datapipes.iter.ziparchivereader import (
+ ZipArchiveReaderIterDataPipe as ZipArchiveReader,
)
from torch.utils.data.datapipes.iter.utils import (
IterableAsDataPipeIterDataPipe as IterableAsDataPipe,
)
-__all__ = ['Batch',
+__all__ = ['Batcher',
'BucketBatcher',
- 'Collate',
- 'Concat',
+ 'ByKeyGrouper',
+ 'Collator',
+ 'Concater',
+ 'FileLister',
+ 'FileLoader',
'Filter',
- 'GroupByKey',
'HttpReader',
'IterableAsDataPipe',
- 'ListDirFiles',
- 'LoadFilesFromDisk',
- 'Map',
- 'ReadFilesFromTar',
- 'ReadFilesFromZip',
- 'ReadLinesFromFile',
+ 'LineReader',
+ 'Mapper',
'RoutedDecoder',
'Sampler',
- 'Shuffle',
- 'ToBytes',
- 'Transforms',
- 'Zip']
+ 'Shuffler',
+ 'StreamReader',
+ 'TarArchiveReader',
+ 'ZipArchiveReader',
+ 'Zipper']
# Please keep this list sorted
assert __all__ == sorted(__all__)