From bb3a2d99ac2210ee75daa99f6320464948ee2b84 Mon Sep 17 00:00:00 2001 From: Jaliya Ekanayake Date: Sat, 23 Feb 2019 08:46:24 -0800 Subject: [PATCH] Jaliyae/chunk buffer fix (#17409) Summary: The chunk buffer had a possibility to hang when no data is read and the buffer size is lower than chunk size. We detected this while running with larger dataset and hence the fix. I added a test to mimic the situation and validated that the fix is working. Thank you Xueyun for finding this issue. Pull Request resolved: https://github.com/pytorch/pytorch/pull/17409 Differential Revision: D14198546 Pulled By: soumith fbshipit-source-id: b8ca43b0400deaae2ebb6601fdc65b47f32b0554 --- test/cpp/api/dataloader.cpp | 36 ++++++++++++++++++++++ torch/csrc/api/include/torch/data/datasets/chunk.h | 8 +++++ 2 files changed, 44 insertions(+) diff --git a/test/cpp/api/dataloader.cpp b/test/cpp/api/dataloader.cpp index 2c1717c..9601911 100644 --- a/test/cpp/api/dataloader.cpp +++ b/test/cpp/api/dataloader.cpp @@ -1843,3 +1843,39 @@ TEST(DataLoaderTest, CanAccessChunkSamplerWithChunkDataSet) { // 3 chunks, and when exhausted the value is already incremented. ASSERT_EQ(chunk_sampler.index(), 3); } + +TEST(DataLoaderTest, ChunkDatasetDoesNotHang) { + const size_t prefetch_count = 2; + const size_t batch_size = 5; + // this will make the preloaders to wait till the `get_batch()` calls. + const size_t cache_size = 10; + + DummyChunkDataReader data_reader; + samplers::SequentialSampler sampler(0); + datasets::SharedBatchDataset> + dataset = datasets::make_shared_dataset>( + data_reader, + sampler, + sampler, + datasets::ChunkDatasetOptions( + prefetch_count, batch_size, cache_size)); + + samplers::SequentialSampler& chunk_sampler = dataset->chunk_sampler(); + + auto data_loader = torch::data::make_data_loader( + dataset.map(transforms::BatchLambda, int>( + [](std::vector batch) { + return std::accumulate(batch.begin(), batch.end(), 0); + })), + DataLoaderOptions(batch_size).workers(0)); + // simply creates the iterator but no iteration. chunk preloaders are waiting + // to fill the batch buffer but it is not draining. Still we need to exit + // cleanly. + auto iterator = data_loader->begin(); +} diff --git a/torch/csrc/api/include/torch/data/datasets/chunk.h b/torch/csrc/api/include/torch/data/datasets/chunk.h index 1507dc3..8326816 100644 --- a/torch/csrc/api/include/torch/data/datasets/chunk.h +++ b/torch/csrc/api/include/torch/data/datasets/chunk.h @@ -293,6 +293,10 @@ class ChunkDataset final running_preloaders_(0) {} virtual ~ChunkDataset() { + // stop batch buffer first. + if (batch_buffer_) { + batch_buffer_->stop(); + } free_workers(); } @@ -317,6 +321,10 @@ class ChunkDataset final /// This will clear any internal state and starts the internal prefetching /// mechanism for the chunk dataset. void reset() override { + // We need this to support partial data reads via dataloader iterator. + if (batch_buffer_) { + batch_buffer_->stop(); + } // free workers from previous reset if there is any. free_workers(); preload_threads_.clear(); -- 2.7.4