From a0c40500cce2ebb7bee552005bdcd3a8ab470172 Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Wed, 30 May 2018 16:38:59 -0700 Subject: [PATCH] Regard a path as a directory if it ends with "/" in GCS. This implies the assumption that if a real GCS object has file name ending with "/", it is always a directory mark rather than an object carrying actual contents. PiperOrigin-RevId: 198640604 --- tensorflow/core/platform/cloud/gcs_file_system.cc | 34 ++++++++++------ .../core/platform/cloud/gcs_file_system_test.cc | 46 ++++++++++++++++++++++ 2 files changed, 67 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 632bb32..5f612b5 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -965,11 +965,16 @@ Status GcsFileSystem::FileExists(const string& fname) { return Status::OK(); } } - bool result; - TF_RETURN_IF_ERROR(ObjectExists(fname, bucket, object, &result)); - if (result) { - return Status::OK(); + + // Check if the object exists. + GcsFileStat stat; + const Status status = StatForObject(fname, bucket, object, &stat); + if (status.code() != errors::Code::NOT_FOUND) { + return status; } + + // Check if the folder exists. + bool result; TF_RETURN_IF_ERROR(FolderExists(fname, &result)); if (result) { return Status::OK(); @@ -982,11 +987,11 @@ Status GcsFileSystem::ObjectExists(const string& fname, const string& bucket, if (!result) { return errors::Internal("'result' cannot be nullptr."); } - GcsFileStat not_used_stat; - const Status status = StatForObject(fname, bucket, object, ¬_used_stat); + GcsFileStat stat; + const Status status = StatForObject(fname, bucket, object, &stat); switch (status.code()) { case errors::Code::OK: - *result = true; + *result = !stat.base.is_directory; return Status::OK(); case errors::Code::NOT_FOUND: *result = false; @@ -1040,7 +1045,14 @@ Status GcsFileSystem::UncachedStatForObject(const string& fname, << "; mtime_nsec: " << stat->base.mtime_nsec << "; updated: " << updated; - stat->base.is_directory = false; + if (str_util::EndsWith(fname, "/")) { + // In GCS a path can be both a directory and a file, both it is uncommon for + // other file systems. To avoid the ambiguity, if a path ends with "/" in + // GCS, we always regard it as a directory mark or a virtual directory. + stat->base.is_directory = true; + } else { + stat->base.is_directory = false; + } return Status::OK(); } @@ -1059,11 +1071,7 @@ Status GcsFileSystem::StatForObject(const string& fname, const string& bucket, [this, &bucket, &object](const string& fname, GcsFileStat* stat) { return UncachedStatForObject(fname, bucket, object, stat); })); - if (stat->base.is_directory) { - return errors::NotFound(fname, " is a directory."); - } else { - return Status::OK(); - } + return Status::OK(); } Status GcsFileSystem::BucketExists(const string& bucket, bool* result) { diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc index 6a28d91..e791ae5 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc @@ -1137,6 +1137,28 @@ TEST(GcsFileSystemTest, FileExists_StatCache) { } } +TEST(GcsFileSystemTest, FileExists_DirectoryMark) { + std::vector requests({new FakeHttpRequest( + "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/" + "dir%2F?fields=size%2Cgeneration%2Cupdated\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n", + strings::StrCat("{\"size\": \"5\",\"generation\": \"1\"," + "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))}); + GcsFileSystem fs( + std::unique_ptr(new FakeAuthProvider), + std::unique_ptr( + new FakeHttpRequestFactory(&requests)), + 0 /* block size */, 0 /* max bytes */, 0 /* max staleness */, + 3600 /* stat cache max age */, 0 /* stat cache max entries */, + 0 /* matching paths cache max age */, + 0 /* matching paths cache max entries */, 0 /* initial retry delay */, + kTestTimeoutConfig, nullptr /* gcs additional header */); + + TF_EXPECT_OK(fs.FileExists("gs://bucket/dir/")); + TF_EXPECT_OK(fs.IsDirectory("gs://bucket/dir/")); +} + TEST(GcsFileSystemTest, GetChildren_NoItems) { std::vector requests({new FakeHttpRequest( "Uri: https://www.googleapis.com/storage/v1/b/bucket/o?" @@ -2407,6 +2429,30 @@ TEST(GcsFileSystemTest, Stat_Cache_Flush) { } } +TEST(GcsFileSystemTest, Stat_FilenameEndingWithSlash) { + std::vector requests({new FakeHttpRequest( + "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/" + "dir%2F?fields=size%2Cgeneration%2Cupdated\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n", + strings::StrCat("{\"size\": \"5\",\"generation\": \"1\"," + "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))}); + GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), + std::unique_ptr( + new FakeHttpRequestFactory(&requests)), + 0 /* block size */, 0 /* max bytes */, 0 /* max staleness */, + 0 /* stat cache max age */, 0 /* stat cache max entries */, + 0 /* matching paths cache max age */, + 0 /* matching paths cache max entries */, + 0 /* initial retry delay*/, kTestTimeoutConfig, + nullptr /* gcs additional header */); + + FileStatistics stat; + TF_EXPECT_OK(fs.Stat("gs://bucket/dir/", &stat)); + EXPECT_EQ(5, stat.length); + EXPECT_TRUE(stat.is_directory); +} + TEST(GcsFileSystemTest, IsDirectory_NotFound) { std::vector requests( {new FakeHttpRequest( -- 2.7.4