Make Unix filename conversion lazy (dotnet/corefx#26978)
authorJeremy Kuhne <jeremy.kuhne@microsoft.com>
Fri, 9 Feb 2018 06:50:38 +0000 (22:50 -0800)
committerGitHub <noreply@github.com>
Fri, 9 Feb 2018 06:50:38 +0000 (22:50 -0800)
* Make Unix filename conversion lazy

Also hook error handling.

* Address feedback

Commit migrated from https://github.com/dotnet/corefx/commit/ef0aca54454748002325b750c839dacba6ed3999

src/libraries/Common/src/Interop/Unix/System.Native/Interop.ReadDir.cs
src/libraries/System.IO.FileSystem/src/System/IO/Enumeration/FileSystemEntry.Unix.cs
src/libraries/System.IO.FileSystem/src/System/IO/Enumeration/FileSystemEnumerator.Unix.cs

index d98c428..5712b20 100644 (file)
@@ -3,15 +3,16 @@
 // See the LICENSE file in the project root for more information.
 
 using System;
+using System.Diagnostics;
 using System.Runtime.InteropServices;
-using System.Threading;
+using System.Text;
 using Microsoft.Win32.SafeHandles;
 
 internal static partial class Interop
 {
     internal static partial class Sys
     {
-        private static readonly int s_readBufferSize = GetReadDirRBufferSize();
+        internal static int ReadBufferSize { get; } = GetReadDirRBufferSize();
 
         internal enum NodeType : int
         {
@@ -27,76 +28,59 @@ internal static partial class Interop
         }
 
         [StructLayout(LayoutKind.Sequential)]
-        private unsafe struct InternalDirectoryEntry
+        internal unsafe struct DirectoryEntry
         {
-            internal IntPtr     Name;
-            internal int        NameLength;
-            internal NodeType   InodeType;
-        }
+            internal byte* Name;
+            internal int NameLength;
+            internal NodeType InodeType;
 
-        internal struct DirectoryEntry
-        {
-            internal NodeType   InodeType;
-            internal string     InodeName;
+            internal ReadOnlySpan<char> GetName(Span<char> buffer)
+            {
+                Debug.Assert(buffer.Length >= Encoding.UTF8.GetMaxCharCount(255), "should have enough space for the max file name");
+                Debug.Assert(Name != null, "should not have a null name");
+
+                ReadOnlySpan<byte> nameBytes = NameLength == -1
+                    // In this case the struct was allocated via struct dirent *readdir(DIR *dirp);
+                    ? new ReadOnlySpan<byte>(Name, new ReadOnlySpan<byte>(Name, 255).IndexOf<byte>(0))
+                    : new ReadOnlySpan<byte>(Name, NameLength);
+
+                Debug.Assert(nameBytes.Length > 0, "we shouldn't have gotten a garbage value from the OS");
+                if (nameBytes.Length == 0)
+                    return buffer.Slice(0, 0);
+
+                int charCount = Encoding.UTF8.GetChars(nameBytes, buffer);
+                ReadOnlySpan<char> value = buffer.Slice(0, charCount);
+                Debug.Assert(value.IndexOf('\0') == -1, "should not have embedded nulls");
+                return value;
+            }
         }
 
         [DllImport(Libraries.SystemNative, EntryPoint = "SystemNative_OpenDir", SetLastError = true)]
-        internal static extern Microsoft.Win32.SafeHandles.SafeDirectoryHandle OpenDir(string path);
+        internal static extern SafeDirectoryHandle OpenDir(string path);
 
         [DllImport(Libraries.SystemNative, EntryPoint = "SystemNative_GetReadDirRBufferSize", SetLastError = false)]
         internal static extern int GetReadDirRBufferSize();
 
         [DllImport(Libraries.SystemNative, EntryPoint = "SystemNative_ReadDirR", SetLastError = false)]
-        private static extern unsafe int ReadDirR(IntPtr dir, byte* buffer, int bufferSize, out InternalDirectoryEntry outputEntry);
+        private static extern unsafe int ReadDirR(IntPtr dir, ref byte buffer, int bufferSize, ref DirectoryEntry outputEntry);
 
         [DllImport(Libraries.SystemNative, EntryPoint = "SystemNative_CloseDir", SetLastError = true)]
         internal static extern int CloseDir(IntPtr dir);
 
-        // The calling pattern for ReadDir is described in src/Native/System.Native/pal_readdir.cpp
-        internal static int ReadDir(SafeDirectoryHandle dir, out DirectoryEntry outputEntry)
+        /// <summary>
+        /// Get the next directory entry for the given handle. **Note** the actual memory used may be allocated
+        /// by the OS and will be freed when the handle is closed. As such, the handle lifespan MUST be kept tightly
+        /// controlled. The DirectoryEntry name cannot be accessed after the handle is closed.
+        /// 
+        /// Call <see cref="ReadBufferSize"/> to see what size buffer to allocate.
+        /// </summary>
+        internal static int ReadDir(SafeDirectoryHandle dir, Span<byte> buffer, ref DirectoryEntry entry)
         {
-            bool addedRef = false;
-            try
-            {
-                // We avoid a native string copy into InternalDirectoryEntry.
-                // - If the platform suppors reading into a buffer, the data is read directly into the buffer. The
-                //   data can be read as long as the buffer is valid.
-                // - If the platform does not support reading into a buffer, the information returned in
-                //   InternalDirectoryEntry points to native memory owned by the SafeDirectoryHandle. The data is only
-                //   valid until the next call to CloseDir/ReadDir. We extend the reference until we have copied all data
-                //   to ensure it does not become invalid by a CloseDir; and we copy the data so our caller does not
-                //   use the native memory held by the SafeDirectoryHandle.
-                dir.DangerousAddRef(ref addedRef);
+            // The calling pattern for ReadDir is described in src/Native/Unix/System.Native/pal_io.cpp|.h
+            Debug.Assert(buffer.Length >= ReadBufferSize, "should have a big enough buffer for the raw data");
 
-                unsafe
-                {
-                    // s_readBufferSize is zero when the native implementation does not support reading into a buffer.
-                    byte* buffer = stackalloc byte[s_readBufferSize];
-                    InternalDirectoryEntry temp;
-                    int ret = ReadDirR(dir.DangerousGetHandle(), buffer, s_readBufferSize, out temp);
-                    // We copy data into DirectoryEntry to ensure there are no dangling references.
-                    outputEntry = ret == 0 ?
-                                new DirectoryEntry() { InodeName = GetDirectoryEntryName(temp), InodeType = temp.InodeType } : 
-                                default(DirectoryEntry);
-
-                    return ret;
-                }
-            }
-            finally
-            {
-                if (addedRef)
-                {
-                    dir.DangerousRelease();
-                }
-            }
-        }
-
-        private static unsafe string GetDirectoryEntryName(InternalDirectoryEntry dirEnt)
-        {
-            if (dirEnt.NameLength == -1)
-                return Marshal.PtrToStringAnsi(dirEnt.Name);
-            else
-                return Marshal.PtrToStringAnsi(dirEnt.Name, dirEnt.NameLength);
+            // ReadBufferSize is zero when the native implementation does not support reading into a buffer.
+            return ReadDirR(dir.DangerousGetHandle(), ref MemoryMarshal.GetReference(buffer), ReadBufferSize, ref entry);
         }
     }
 }
index d142879..2e6a5c1 100644 (file)
@@ -9,6 +9,14 @@ namespace System.IO.Enumeration
     /// </summary>
     public unsafe ref struct FileSystemEntry
     {
+        private const int FileNameBufferSize = 256;
+        internal Interop.Sys.DirectoryEntry _directoryEntry;
+        private FileStatus _status;
+        private Span<char> _pathBuffer;
+        private ReadOnlySpan<char> _fullPath;
+        private ReadOnlySpan<char> _fileName;
+        private fixed char _fileNameBuffer[FileNameBufferSize];
+
         internal static bool Initialize(
             ref FileSystemEntry entry,
             Interop.Sys.DirectoryEntry directoryEntry,
@@ -44,10 +52,6 @@ namespace System.IO.Enumeration
             return isDirectory;
         }
 
-        internal Interop.Sys.DirectoryEntry _directoryEntry;
-        private FileStatus _status;
-        private Span<char> _pathBuffer;
-        private ReadOnlySpan<char> _fullPath;
 
         private ReadOnlySpan<char> FullPath
         {
@@ -58,7 +62,7 @@ namespace System.IO.Enumeration
                     ReadOnlySpan<char> directory = Directory;
                     directory.CopyTo(_pathBuffer);
                     _pathBuffer[directory.Length] = Path.DirectorySeparatorChar;
-                    ReadOnlySpan<char> fileName = _directoryEntry.InodeName;
+                    ReadOnlySpan<char> fileName = FileName;
                     fileName.CopyTo(_pathBuffer.Slice(directory.Length + 1));
                     _fullPath = _pathBuffer.Slice(0, directory.Length + 1 + fileName.Length);
                 }
@@ -66,6 +70,24 @@ namespace System.IO.Enumeration
             }
         }
 
+        public ReadOnlySpan<char> FileName
+        {
+            get
+            {
+                if (_directoryEntry.Name != null)
+                {
+                    fixed (char* c = _fileNameBuffer)
+                    {
+                        Span<char> buffer = new Span<char>(c, FileNameBufferSize);
+                        _fileName = _directoryEntry.GetName(buffer);
+                    }
+                    _directoryEntry.Name = null;
+                }
+
+                return _fileName;
+            }
+        }
+
         /// <summary>
         /// The full path of the directory this entry resides in.
         /// </summary>
@@ -81,7 +103,6 @@ namespace System.IO.Enumeration
         /// </summary>
         public string OriginalRootDirectory { get; private set; }
 
-        public ReadOnlySpan<char> FileName => _directoryEntry.InodeName;
         public FileAttributes Attributes => _status.GetAttributes(FullPath, FileName);
         public long Length => _status.GetLength(FullPath);
         public DateTimeOffset CreationTimeUtc => _status.GetCreationTime(FullPath);
@@ -94,11 +115,11 @@ namespace System.IO.Enumeration
             string fullPath = ToFullPath();
             if (_status.InitiallyDirectory)
             {
-                return DirectoryInfo.Create(fullPath, _directoryEntry.InodeName, ref _status);
+                return DirectoryInfo.Create(fullPath, new string(FileName), ref _status);
             }
             else
             {
-                return FileInfo.Create(fullPath, _directoryEntry.InodeName, ref _status);
+                return FileInfo.Create(fullPath, new string(FileName), ref _status);
             }
         }
 
index 616e169..9c4789a 100644 (file)
@@ -4,6 +4,7 @@
 
 using System.Buffers;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.Runtime.ConstrainedExecution;
 using Microsoft.Win32.SafeHandles;
 
@@ -30,6 +31,8 @@ namespace System.IO.Enumeration
 
         // Used for creating full paths
         private char[] _pathBuffer;
+        // Used to get the raw entry data
+        private byte[] _entryBuffer;
 
         /// <summary>
         /// Encapsulates a find operation.
@@ -53,6 +56,8 @@ namespace System.IO.Enumeration
             try
             {
                 _pathBuffer = ArrayPool<char>.Shared.Rent(StandardBufferSize);
+                int size = Interop.Sys.ReadBufferSize;
+                _entryBuffer = size > 0 ? ArrayPool<byte>.Shared.Rent(size) : null;
             }
             catch
             {
@@ -62,7 +67,7 @@ namespace System.IO.Enumeration
             }
         }
 
-        private static SafeDirectoryHandle CreateDirectoryHandle(string path)
+        private SafeDirectoryHandle CreateDirectoryHandle(string path)
         {
             // TODO: https://github.com/dotnet/corefx/issues/26715
             // - Check access denied option and allow through if specified.
@@ -70,7 +75,13 @@ namespace System.IO.Enumeration
             SafeDirectoryHandle handle = Interop.Sys.OpenDir(path);
             if (handle.IsInvalid)
             {
-                throw Interop.GetExceptionForIoErrno(Interop.Sys.GetLastErrorInfo(), path, isDirectory: true);
+                Interop.ErrorInfo info = Interop.Sys.GetLastErrorInfo();
+                if ((_options.IgnoreInaccessible && IsAccessError(info.RawErrno))
+                    || ContinueOnError(info.RawErrno))
+                {
+                    return null;
+                }
+                throw Interop.GetExceptionForIoErrno(info, path, isDirectory: true);
             }
             return handle;
         }
@@ -107,7 +118,7 @@ namespace System.IO.Enumeration
                         {
                             // These three we don't have to hit the disk again to evaluate
                             if (((_options.AttributesToSkip & FileAttributes.Directory) != 0 && isDirectory)
-                                || ((_options.AttributesToSkip & FileAttributes.Hidden) != 0 && _entry.InodeName[0] == '.')
+                                || ((_options.AttributesToSkip & FileAttributes.Hidden) != 0 && _entry.Name[0] == '.')
                                 || ((_options.AttributesToSkip & FileAttributes.ReparsePoint) != 0 && _entry.InodeType == Interop.Sys.NodeType.DT_LNK))
                                 continue;
                         }
@@ -121,7 +132,7 @@ namespace System.IO.Enumeration
                     if (isDirectory)
                     {
                         // Subdirectory found
-                        if (PathHelpers.IsDotOrDotDot(_entry.InodeName))
+                        if (_entry.Name[0] == '.' && (_entry.Name[1] == 0 || (_entry.Name[1] == '.' && _entry.Name[2] == 0)))
                         {
                             // "." or "..", don't process unless the option is set
                             if (!_options.ReturnSpecialDirectories)
@@ -130,7 +141,7 @@ namespace System.IO.Enumeration
                         else if (_options.RecurseSubdirectories && ShouldRecurseIntoEntry(ref entry))
                         {
                             // Recursion is on and the directory was accepted, Queue it
-                            string subdirectory = PathHelpers.CombineNoChecks(_currentPath, _entry.InodeName);
+                            string subdirectory = PathHelpers.CombineNoChecks(_currentPath, entry.FileName);
                             SafeDirectoryHandle subdirectoryHandle = CreateDirectoryHandle(subdirectory);
                             if (subdirectoryHandle != null)
                             {
@@ -161,17 +172,36 @@ namespace System.IO.Enumeration
 
         private unsafe void FindNextEntry()
         {
-            // Read each entry from the enumerator
-            if (Interop.Sys.ReadDir(_directoryHandle, out _entry) != 0)
+            Span<byte> buffer = _entryBuffer == null ? Span<byte>.Empty : new Span<byte>(_entryBuffer);
+            int result = Interop.Sys.ReadDir(_directoryHandle, buffer, ref _entry);
+            switch (result)
             {
-                // TODO: https://github.com/dotnet/corefx/issues/26715
-                // - Refactor ReadDir so we can process errors here
-
-                // Directory finished
-                DirectoryFinished();
+                case -1:
+                    // End of directory
+                    DirectoryFinished();
+                    break;
+                case 0:
+                    // Success
+                    break;
+                default:
+                    // Error
+                    if ((_options.IgnoreInaccessible && IsAccessError(result))
+                        || ContinueOnError(result))
+                    {
+                        DirectoryFinished();
+                        break;
+                    }
+                    else
+                    {
+                        throw Interop.GetExceptionForIoErrno(new Interop.ErrorInfo(result), _currentPath, isDirectory: true);
+                    }
             }
         }
 
+        private static bool IsAccessError(int error)
+            => error == (int)Interop.Error.EACCES || error == (int)Interop.Error.EBADF
+                || error == (int)Interop.Error.EPERM;
+
         private void InternalDispose(bool disposing)
         {
             // It is possible to fail to allocate the lock, but the finalizer will still run
@@ -189,6 +219,13 @@ namespace System.IO.Enumeration
                             _pending.Dequeue().Handle.Dispose();
                         _pending = null;
                     }
+
+                    if (_pathBuffer != null)
+                        ArrayPool<char>.Shared.Return(_pathBuffer);
+                    _pathBuffer = null;
+                    if (_entryBuffer != null)
+                        ArrayPool<byte>.Shared.Return(_entryBuffer);
+                    _entryBuffer = null;
                 }
             }