From d1b381661c082b6bd74a925ec9b91fba6b33e884 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 17 Nov 2021 13:29:49 +0100 Subject: [PATCH] Minor File.ReadAllBytes* improvements (#61519) * switch from FileStream to RandomAccess * use Array.MaxLength as a limit for File.ReadAllBytes and fix an edge case bug for files: Array.MaxLength < Length < int.MaxValue * there is no gain of using FileOptions.SequentialScan on Unix, as it requires an additional sys call Co-authored-by: Dan Moseley --- .../tests/File/ReadWriteAllBytes.cs | 15 +++++ .../tests/File/ReadWriteAllBytesAsync.cs | 21 ++++++- .../System.Private.CoreLib/src/System/IO/File.cs | 73 +++++++++++----------- 3 files changed, 68 insertions(+), 41 deletions(-) diff --git a/src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytes.cs b/src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytes.cs index 679ff9b..39becd6 100644 --- a/src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytes.cs +++ b/src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytes.cs @@ -73,6 +73,21 @@ namespace System.IO.Tests } [Fact] + [OuterLoop] + [ActiveIssue("https://github.com/dotnet/runtime/issues/45954", TestPlatforms.Browser)] + public void ReadFileOverMaxArrayLength() + { + string path = GetTestFilePath(); + using (FileStream fs = File.Create(path)) + { + fs.SetLength(Array.MaxLength + 1L); + } + + // File is too large for ReadAllBytes at once + Assert.Throws(() => File.ReadAllBytes(path)); + } + + [Fact] public void Overwrite() { string path = GetTestFilePath(); diff --git a/src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytesAsync.cs b/src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytesAsync.cs index be562f1..6414ab2 100644 --- a/src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytesAsync.cs +++ b/src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytesAsync.cs @@ -73,7 +73,7 @@ namespace System.IO.Tests [Fact] [OuterLoop] [ActiveIssue("https://github.com/dotnet/runtime/issues/45954", TestPlatforms.Browser)] - public Task ReadFileOver2GBAsync() + public async Task ReadFileOver2GBAsync() { string path = GetTestFilePath(); using (FileStream fs = File.Create(path)) @@ -81,8 +81,23 @@ namespace System.IO.Tests fs.SetLength(int.MaxValue + 1L); } - // File is too large for ReadAllBytes at once - return Assert.ThrowsAsync(async () => await File.ReadAllBytesAsync(path)); + // File is too large for ReadAllBytesAsync at once + await Assert.ThrowsAsync(async () => await File.ReadAllBytesAsync(path)); + } + + [Fact] + [OuterLoop] + [ActiveIssue("https://github.com/dotnet/runtime/issues/45954", TestPlatforms.Browser)] + public async Task ReadFileOverMaxArrayLengthAsync() + { + string path = GetTestFilePath(); + using (FileStream fs = File.Create(path)) + { + fs.SetLength(Array.MaxLength + 1L); + } + + // File is too large for ReadAllBytesAsync at once + await Assert.ThrowsAsync(async () => await File.ReadAllBytesAsync(path)); } [Fact] diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/File.cs b/src/libraries/System.Private.CoreLib/src/System/IO/File.cs index 29dff0c..aa0bb62 100644 --- a/src/libraries/System.Private.CoreLib/src/System/IO/File.cs +++ b/src/libraries/System.Private.CoreLib/src/System/IO/File.cs @@ -251,19 +251,25 @@ namespace System.IO public static byte[] ReadAllBytes(string path) { - // bufferSize == 1 used to avoid unnecessary buffer in FileStream - using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, FileOptions.SequentialScan)) + // SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes. + FileOptions options = OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None; + using (SafeFileHandle sfh = OpenHandle(path, FileMode.Open, FileAccess.Read, FileShare.Read, options)) { long fileLength = 0; - if (fs.CanSeek && (fileLength = fs.Length) > int.MaxValue) + if (sfh.CanSeek && (fileLength = RandomAccess.GetFileLength(sfh)) > Array.MaxLength) { throw new IOException(SR.IO_FileTooLong2GB); } + +#if DEBUG + fileLength = 0; // improve the test coverage for ReadAllBytesUnknownLength +#endif + if (fileLength == 0) { - // Some file systems (e.g. procfs on Linux) return 0 for length even when there's content; also there is non-seekable file stream. + // Some file systems (e.g. procfs on Linux) return 0 for length even when there's content; also there are non-seekable files. // Thus we need to assume 0 doesn't mean empty. - return ReadAllBytesUnknownLength(fs); + return ReadAllBytesUnknownLength(sfh); } int index = 0; @@ -271,7 +277,7 @@ namespace System.IO byte[] bytes = new byte[count]; while (count > 0) { - int n = fs.Read(bytes, index, count); + int n = RandomAccess.ReadAtOffset(sfh, bytes.AsSpan(index, count), index); if (n == 0) { ThrowHelper.ThrowEndOfFileException(); @@ -519,44 +525,35 @@ namespace System.IO return Task.FromCanceled(cancellationToken); } - var fs = new FileStream( - path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, // bufferSize == 1 used to avoid unnecessary buffer in FileStream - FileOptions.Asynchronous | FileOptions.SequentialScan); + // SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes. + FileOptions options = FileOptions.Asynchronous | (OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None); + SafeFileHandle sfh = OpenHandle(path, FileMode.Open, FileAccess.Read, FileShare.Read, options); - bool returningInternalTask = false; - try + long fileLength = 0L; + if (sfh.CanSeek && (fileLength = RandomAccess.GetFileLength(sfh)) > Array.MaxLength) { - long fileLength = 0L; - if (fs.CanSeek && (fileLength = fs.Length) > int.MaxValue) - { - var e = new IOException(SR.IO_FileTooLong2GB); - ExceptionDispatchInfo.SetCurrentStackTrace(e); - return Task.FromException(e); - } - - returningInternalTask = true; - return fileLength > 0 ? - InternalReadAllBytesAsync(fs, (int)fileLength, cancellationToken) : - InternalReadAllBytesUnknownLengthAsync(fs, cancellationToken); - } - finally - { - if (!returningInternalTask) - { - fs.Dispose(); - } + sfh.Dispose(); + return Task.FromException(ExceptionDispatchInfo.SetCurrentStackTrace(new IOException(SR.IO_FileTooLong2GB))); } + +#if DEBUG + fileLength = 0; // improve the test coverage for InternalReadAllBytesUnknownLengthAsync +#endif + + return fileLength > 0 ? + InternalReadAllBytesAsync(sfh, (int)fileLength, cancellationToken) : + InternalReadAllBytesUnknownLengthAsync(sfh, cancellationToken); } - private static async Task InternalReadAllBytesAsync(FileStream fs, int count, CancellationToken cancellationToken) + private static async Task InternalReadAllBytesAsync(SafeFileHandle sfh, int count, CancellationToken cancellationToken) { - using (fs) + using (sfh) { int index = 0; byte[] bytes = new byte[count]; do { - int n = await fs.ReadAsync(new Memory(bytes, index, count - index), cancellationToken).ConfigureAwait(false); + int n = await RandomAccess.ReadAtOffsetAsync(sfh, bytes.AsMemory(index), index, cancellationToken).ConfigureAwait(false); if (n == 0) { ThrowHelper.ThrowEndOfFileException(); @@ -569,7 +566,7 @@ namespace System.IO } } - private static async Task InternalReadAllBytesUnknownLengthAsync(FileStream fs, CancellationToken cancellationToken) + private static async Task InternalReadAllBytesUnknownLengthAsync(SafeFileHandle sfh, CancellationToken cancellationToken) { byte[] rentedArray = ArrayPool.Shared.Rent(512); try @@ -595,7 +592,7 @@ namespace System.IO } Debug.Assert(bytesRead < rentedArray.Length); - int n = await fs.ReadAsync(rentedArray.AsMemory(bytesRead), cancellationToken).ConfigureAwait(false); + int n = await RandomAccess.ReadAtOffsetAsync(sfh, rentedArray.AsMemory(bytesRead), bytesRead, cancellationToken).ConfigureAwait(false); if (n == 0) { return rentedArray.AsSpan(0, bytesRead).ToArray(); @@ -605,7 +602,7 @@ namespace System.IO } finally { - fs.Dispose(); + sfh.Dispose(); ArrayPool.Shared.Return(rentedArray); } } @@ -775,7 +772,7 @@ namespace System.IO throw new ArgumentException(SR.Argument_EmptyPath, nameof(path)); } - private static byte[] ReadAllBytesUnknownLength(FileStream fs) + private static byte[] ReadAllBytesUnknownLength(SafeFileHandle sfh) { byte[]? rentedArray = null; Span buffer = stackalloc byte[512]; @@ -803,7 +800,7 @@ namespace System.IO } Debug.Assert(bytesRead < buffer.Length); - int n = fs.Read(buffer.Slice(bytesRead)); + int n = RandomAccess.ReadAtOffset(sfh, buffer.Slice(bytesRead), bytesRead); if (n == 0) { return buffer.Slice(0, bytesRead).ToArray(); -- 2.7.4