Add ref APIs and unit tests for System.Text.Unicode.Utf8 (dotnet/corefx#34538)

author Levi Broderick <GrabYourPitchforks@users.noreply.github.com>

Mon, 18 Mar 2019 22:05:05 +0000 (15:05 -0700)

committer GitHub <noreply@github.com>

Mon, 18 Mar 2019 22:05:05 +0000 (15:05 -0700)
author Levi Broderick <GrabYourPitchforks@users.noreply.github.com>
Mon, 18 Mar 2019 22:05:05 +0000 (15:05 -0700)
committer GitHub <noreply@github.com>
Mon, 18 Mar 2019 22:05:05 +0000 (15:05 -0700)
diff --git a/src/libraries/CoreFx.Private.TestUtilities/ref/Configurations.props b/src/libraries/CoreFx.Private.TestUtilities/ref/Configurations.props

index ff0d415e4593d0f7d2977c58d3a667cbdff5d082..ca0a1fdebff7dc24540a4176cf59e13fa7778bc7 100644 (file)
--- a/src/libraries/CoreFx.Private.TestUtilities/ref/Configurations.props
+++ b/src/libraries/CoreFx.Private.TestUtilities/ref/Configurations.props
@@ -1,7 +1,9 @@
  <Project>
    <PropertyGroup>
      <BuildConfigurations>
+      netcoreapp;
        netstandard;
+      uap;
      </BuildConfigurations>
    </PropertyGroup>
-</Project>
-\ No newline at end of file
+</Project>
diff --git a/src/libraries/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.csproj b/src/libraries/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.csproj

index 01f758e62796341bda93cffcd25f92359ff8adfb..338f7156c26a2413e15fd447a503b01fa9b362d4 100644 (file)
--- a/src/libraries/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.csproj
+++ b/src/libraries/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.csproj
@@ -1,13 +1,20 @@
-<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
    <PropertyGroup>
      <ProjectGuid>{E2E59C98-998F-9965-991D-99411166AF6F}</ProjectGuid>
      <ShouldWriteSigningRequired>false</ShouldWriteSigningRequired>
      <AllowReferenceFromRuntime>true</AllowReferenceFromRuntime>
      <RuntimeProjectFile>$(RepoRoot)\external\test-runtime\XUnit.Runtime.depproj</RuntimeProjectFile>
-    <Configurations>netstandard-Debug;netstandard-Release</Configurations>
+    <Configurations>netcoreapp-Debug;netcoreapp-Release;netstandard-Debug;netstandard-Release;uap-Debug;uap-Release</Configurations>
    </PropertyGroup>
    <ItemGroup>
      <Compile Include="CoreFx.Private.TestUtilities.cs" />
      <ReferenceFromRuntime Include="xunit.core" />
    </ItemGroup>
+  <ItemGroup Condition="'$(TargetGroup)' == 'netcoreapp' OR '$(TargetGroup)' == 'uap'">
+    <Compile Include="CoreFx.Private.TestUtilities.netcoreapp.cs" />
+    <ProjectReference Include="..\..\System.Collections\ref\System.Collections.csproj" />
+    <ProjectReference Include="..\..\System.Diagnostics.Process\ref\System.Diagnostics.Process.csproj" />
+    <ProjectReference Include="..\..\System.Runtime\ref\System.Runtime.csproj" />
+    <ProjectReference Include="..\..\System.Runtime.InteropServices\ref\System.Runtime.InteropServices.csproj" />
+  </ItemGroup>
  </Project>
 \ No newline at end of file
diff --git a/src/libraries/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.netcoreapp.cs b/src/libraries/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.netcoreapp.cs

new file mode 100644 (file)

index 0000000..ed68ba1
--- /dev/null
+++ b/src/libraries/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.netcoreapp.cs
@@ -0,0 +1,30 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// ------------------------------------------------------------------------------
+// Changes to this file must follow the http://aka.ms/api-review process.
+// ------------------------------------------------------------------------------
+
+namespace System.Buffers
+{
+    public static partial class BoundedMemory
+    {
+        public static System.Buffers.BoundedMemory<T> Allocate<T>(int elementCount, System.Buffers.PoisonPagePlacement placement = System.Buffers.PoisonPagePlacement.After) where T : unmanaged { throw null; }
+        public static System.Buffers.BoundedMemory<T> AllocateFromExistingData<T>(System.ReadOnlySpan<T> data, System.Buffers.PoisonPagePlacement placement = System.Buffers.PoisonPagePlacement.After) where T : unmanaged { throw null; }
+        public static System.Buffers.BoundedMemory<T> AllocateFromExistingData<T>(T[] data, System.Buffers.PoisonPagePlacement placement = System.Buffers.PoisonPagePlacement.After) where T : unmanaged { throw null; }
+    }
+    public abstract partial class BoundedMemory<T> : IDisposable where T : unmanaged
+    {
+        public abstract bool IsReadonly { get; }
+        public abstract System.Memory<T> Memory { get; }
+        public abstract System.Span<T> Span { get; }
+        public abstract void Dispose();
+        public abstract void MakeReadonly();
+        public abstract void MakeWriteable();
+    }
+    public enum PoisonPagePlacement
+    {
+        After = 0,
+        Before = 1,
+    }
+}
diff --git a/src/libraries/CoreFx.Private.TestUtilities/src/CoreFx.Private.TestUtilities.csproj b/src/libraries/CoreFx.Private.TestUtilities/src/CoreFx.Private.TestUtilities.csproj

index 262fb54bd404c407903366ab17d64d80b6a7273f..58b301b1e8d192632c8696e4660d34d529cdf78b 100644 (file)
--- a/src/libraries/CoreFx.Private.TestUtilities/src/CoreFx.Private.TestUtilities.csproj
+++ b/src/libraries/CoreFx.Private.TestUtilities/src/CoreFx.Private.TestUtilities.csproj
@@ -12,6 +12,13 @@
      <GeneratePlatformNotSupportedAssemblyMessage Condition="'$(TargetGroup)' == 'netstandard'">Test Utilities are not supported on this platform</GeneratePlatformNotSupportedAssemblyMessage>
      <Configurations>netcoreapp-Unix-Debug;netcoreapp-Unix-Release;netcoreapp-Windows_NT-Debug;netcoreapp-Windows_NT-Release;netcoreapp2.0-Unix-Debug;netcoreapp2.0-Unix-Release;netcoreapp2.0-Windows_NT-Debug;netcoreapp2.0-Windows_NT-Release;netcoreappaot-Windows_NT-Debug;netcoreappaot-Windows_NT-Release;netfx-Windows_NT-Debug;netfx-Windows_NT-Release;netstandard-Debug;netstandard-Release;uap-Windows_NT-Debug;uap-Windows_NT-Release;uapaot-Windows_NT-Debug;uapaot-Windows_NT-Release</Configurations>
    </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="System\Buffers\BoundedMemory.cs" />
+    <Compile Include="System\Buffers\BoundedMemory.Creation.cs" />
+    <Compile Include="System\Buffers\BoundedMemory.Unix.cs" Condition="'$(TargetsWindows)' != 'true'" />
+    <Compile Include="System\Buffers\BoundedMemory.Windows.cs" Condition="'$(TargetsWindows)' == 'true'" />
+    <Compile Include="System\Buffers\PoisonPagePlacement.cs" />
+  </ItemGroup>
    <ItemGroup Condition="'$(TargetGroup)' != 'netstandard'">
      <Compile Include="System\AdminHelpers.cs" />
      <Compile Include="System\IO\FileCleanupTestBase.cs" />
diff --git a/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Creation.cs b/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Creation.cs

new file mode 100644 (file)

index 0000000..22a7d14
--- /dev/null
+++ b/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Creation.cs
@@ -0,0 +1,80 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Runtime.InteropServices;
+
+namespace System.Buffers
+{
+    /// <summary>
+    /// Contains factory methods to create <see cref="BoundedMemory{T}"/> instances.
+    /// </summary>
+    public static partial class BoundedMemory
+    {
+        /// <summary>
+        /// Allocates a new <see cref="BoundedMemory{T}"/> region which is immediately preceded by
+        /// or immediately followed by a poison (MEM_NOACCESS) page. If <paramref name="placement"/>
+        /// is <see cref="PoisonPagePlacement.Before"/>, then attempting to read the memory
+        /// immediately before the returned <see cref="BoundedMemory{T}"/> will result in an AV.
+        /// If <paramref name="placement"/> is <see cref="PoisonPagePlacement.After"/>, then
+        /// attempting to read the memory immediately after the returned <see cref="BoundedMemory{T}"/>
+        /// will result in AV.
+        /// </summary>
+        /// <remarks>
+        /// The newly-allocated memory will be populated with random data.
+        /// </remarks>
+        public static BoundedMemory<T> Allocate<T>(int elementCount, PoisonPagePlacement placement = PoisonPagePlacement.After) where T : unmanaged
+        {
+            if (elementCount < 0)
+            {
+                throw new ArgumentOutOfRangeException(nameof(elementCount));
+            }
+            if (placement != PoisonPagePlacement.Before && placement != PoisonPagePlacement.After)
+            {
+                throw new ArgumentOutOfRangeException(nameof(placement));
+            }
+
+            var retVal = AllocateWithoutDataPopulation<T>(elementCount, placement);
+            FillRandom(MemoryMarshal.AsBytes(retVal.Span));
+            return retVal;
+        }
+
+        /// <summary>
+        /// Similar to <see cref="Allocate(int, PoisonPagePlacement)"/>, but populates the allocated
+        /// native memory block from existing data rather than using random data.
+        /// </summary>
+        public static BoundedMemory<T> AllocateFromExistingData<T>(ReadOnlySpan<T> data, PoisonPagePlacement placement = PoisonPagePlacement.After) where T : unmanaged
+        {
+            if (placement != PoisonPagePlacement.Before && placement != PoisonPagePlacement.After)
+            {
+                throw new ArgumentOutOfRangeException(nameof(placement));
+            }
+
+            var retVal = AllocateWithoutDataPopulation<T>(data.Length, placement);
+            data.CopyTo(retVal.Span);
+            return retVal;
+        }
+
+        /// <summary>
+        /// Similar to <see cref="Allocate(int, PoisonPagePlacement)"/>, but populates the allocated
+        /// native memory block from existing data rather than using random data.
+        /// </summary>
+        public static BoundedMemory<T> AllocateFromExistingData<T>(T[] data, PoisonPagePlacement placement = PoisonPagePlacement.After) where T : unmanaged
+        {
+            return AllocateFromExistingData(new ReadOnlySpan<T>(data), placement);
+        }
+
+        private static void FillRandom(Span<byte> buffer)
+        {
+            // Loop over a Random instance manually since Random.NextBytes(Span<byte>) doesn't
+            // exist on all platforms we target.
+
+            Random random = new Random(); // doesn't need to be cryptographically strong
+
+            for (int i = 0; i < buffer.Length; i++)
+            {
+                buffer[i] = (byte)random.Next();
+            }
+        }
+    }
+}
diff --git a/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Unix.cs b/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Unix.cs

new file mode 100644 (file)

index 0000000..aa9d87a
--- /dev/null
+++ b/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Unix.cs
@@ -0,0 +1,47 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace System.Buffers
+{
+    public static partial class BoundedMemory
+    {
+        private static UnixImplementation<T> AllocateWithoutDataPopulation<T>(int elementCount, PoisonPagePlacement placement) where T : unmanaged
+        {
+            // On non-Windows platforms, we don't yet have support for changing the permissions of individual pages.
+
+            return new UnixImplementation<T>(elementCount);
+        }
+
+        private sealed class UnixImplementation<T> : BoundedMemory<T> where T : unmanaged
+        {
+            private readonly T[] _buffer;
+
+            public UnixImplementation(int elementCount)
+            {
+                _buffer = new T[elementCount];
+            }
+
+            public override bool IsReadonly => false;
+
+            public override Memory<T> Memory => _buffer;
+
+            public override Span<T> Span => _buffer;
+
+            public override void Dispose()
+            {
+                // no-op
+            }
+
+            public override void MakeReadonly()
+            {
+                // no-op
+            }
+
+            public override void MakeWriteable()
+            {
+                // no-op
+            }
+        }
+    }
+}
diff --git a/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Windows.cs b/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Windows.cs

new file mode 100644 (file)

index 0000000..d60df68
--- /dev/null
+++ b/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Windows.cs
@@ -0,0 +1,333 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Runtime.ConstrainedExecution;
+using System.Runtime.InteropServices;
+using System.Security;
+
+namespace System.Buffers
+{
+    public static unsafe partial class BoundedMemory
+    {
+        private static readonly int SystemPageSize = Environment.SystemPageSize;
+
+        private static WindowsImplementation<T> AllocateWithoutDataPopulation<T>(int elementCount, PoisonPagePlacement placement) where T : unmanaged
+        {
+            long cb, totalBytesToAllocate;
+            checked
+            {
+                cb = elementCount * sizeof(T);
+                totalBytesToAllocate = cb;
+
+                // We only need to round the count up if it's not an exact multiple
+                // of the system page size.
+
+                var leftoverBytes = totalBytesToAllocate % SystemPageSize;
+                if (leftoverBytes != 0)
+                {
+                    totalBytesToAllocate += SystemPageSize - leftoverBytes;
+                }
+
+                // Finally, account for the poison pages at the front and back.
+
+                totalBytesToAllocate += 2 * SystemPageSize;
+            }
+
+            // Reserve and commit the entire range as NOACCESS.
+
+            var handle = UnsafeNativeMethods.VirtualAlloc(
+                lpAddress: IntPtr.Zero,
+                dwSize: (IntPtr)totalBytesToAllocate /* cast throws OverflowException if out of range */,
+                flAllocationType: VirtualAllocAllocationType.MEM_RESERVE | VirtualAllocAllocationType.MEM_COMMIT,
+                flProtect: VirtualAllocProtection.PAGE_NOACCESS);
+
+            if (handle == null || handle.IsInvalid)
+            {
+                Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error());
+                throw new InvalidOperationException("VirtualAlloc failed unexpectedly.");
+            }
+
+            // Done allocating! Now carve out a READWRITE section bookended by the NOACCESS
+            // pages and return that carved-out section to the caller. Since memory protection
+            // flags only apply at page-level granularity, we need to "left-align" or "right-
+            // align" the section we carve out so that it's guaranteed adjacent to one of
+            // the NOACCESS bookend pages.
+
+            return new WindowsImplementation<T>(
+                handle: handle,
+                byteOffsetIntoHandle: (placement == PoisonPagePlacement.Before)
+                    ? SystemPageSize /* just after leading poison page */
+                    : checked((int)(totalBytesToAllocate - SystemPageSize - cb)) /* just before trailing poison page */,
+                elementCount: elementCount)
+            {
+                Protection = VirtualAllocProtection.PAGE_READWRITE
+            };
+        }
+
+        private sealed class WindowsImplementation<T> : BoundedMemory<T> where T : unmanaged
+        {
+            private readonly VirtualAllocHandle _handle;
+            private readonly int _byteOffsetIntoHandle;
+            private readonly int _elementCount;
+            private readonly BoundedMemoryManager _memoryManager;
+
+            internal WindowsImplementation(VirtualAllocHandle handle, int byteOffsetIntoHandle, int elementCount)
+            {
+                _handle = handle;
+                _byteOffsetIntoHandle = byteOffsetIntoHandle;
+                _elementCount = elementCount;
+                _memoryManager = new BoundedMemoryManager(this);
+            }
+
+            public override bool IsReadonly => (Protection != VirtualAllocProtection.PAGE_READWRITE);
+
+            internal VirtualAllocProtection Protection
+            {
+                get
+                {
+                    bool refAdded = false;
+                    try
+                    {
+                        _handle.DangerousAddRef(ref refAdded);
+                        if (UnsafeNativeMethods.VirtualQuery(
+                            lpAddress: _handle.DangerousGetHandle() + _byteOffsetIntoHandle,
+                            lpBuffer: out var memoryInfo,
+                            dwLength: (IntPtr)sizeof(MEMORY_BASIC_INFORMATION)) == IntPtr.Zero)
+                        {
+                            Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error());
+                            throw new InvalidOperationException("VirtualQuery failed unexpectedly.");
+                        }
+                        return memoryInfo.Protect;
+                    }
+                    finally
+                    {
+                        if (refAdded)
+                        {
+                            _handle.DangerousRelease();
+                        }
+                    }
+                }
+                set
+                {
+                    if (_elementCount > 0)
+                    {
+                        bool refAdded = false;
+                        try
+                        {
+                            _handle.DangerousAddRef(ref refAdded);
+                            if (!UnsafeNativeMethods.VirtualProtect(
+                                lpAddress: _handle.DangerousGetHandle() + _byteOffsetIntoHandle,
+                                dwSize: (IntPtr)(&((T*)null)[_elementCount]),
+                                flNewProtect: value,
+                                lpflOldProtect: out _))
+                            {
+                                Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error());
+                                throw new InvalidOperationException("VirtualProtect failed unexpectedly.");
+                            }
+                        }
+                        finally
+                        {
+                            if (refAdded)
+                            {
+                                _handle.DangerousRelease();
+                            }
+                        }
+                    }
+                }
+            }
+
+            public override Memory<T> Memory => _memoryManager.Memory;
+
+            public override Span<T> Span
+            {
+                get
+                {
+                    bool refAdded = false;
+                    try
+                    {
+                        _handle.DangerousAddRef(ref refAdded);
+                        return new Span<T>((void*)(_handle.DangerousGetHandle() + _byteOffsetIntoHandle), _elementCount);
+                    }
+                    finally
+                    {
+                        if (refAdded)
+                        {
+                            _handle.DangerousRelease();
+                        }
+                    }
+                }
+            }
+
+            public override void Dispose()
+            {
+                _handle.Dispose();
+            }
+
+            public override void MakeReadonly()
+            {
+                Protection = VirtualAllocProtection.PAGE_READONLY;
+            }
+
+            public override void MakeWriteable()
+            {
+                Protection = VirtualAllocProtection.PAGE_READWRITE;
+            }
+
+            private sealed class BoundedMemoryManager : MemoryManager<T>
+            {
+                private readonly WindowsImplementation<T> _impl;
+
+                public BoundedMemoryManager(WindowsImplementation<T> impl)
+                {
+                    _impl = impl;
+                }
+
+                public override Memory<T> Memory => CreateMemory(_impl._elementCount);
+
+                protected override void Dispose(bool disposing)
+                {
+                    // no-op; the handle will be disposed separately
+                }
+
+                public override Span<T> GetSpan()
+                {
+                    throw new NotImplementedException();
+                }
+
+                public override MemoryHandle Pin(int elementIndex)
+                {
+                    if ((uint)elementIndex > (uint)_impl._elementCount)
+                    {
+                        throw new ArgumentOutOfRangeException(paramName: nameof(elementIndex));
+                    }
+
+                    bool refAdded = false;
+                    try
+                    {
+                        _impl._handle.DangerousAddRef(ref refAdded);
+                        return new MemoryHandle((T*)(_impl._handle.DangerousGetHandle() + _impl._byteOffsetIntoHandle) + elementIndex);
+                    }
+                    finally
+                    {
+                        if (refAdded)
+                        {
+                            _impl._handle.DangerousRelease();
+                        }
+                    }
+                }
+
+                public override void Unpin()
+                {
+                    // no-op - we don't unpin native memory
+                }
+            }
+        }
+
+        // from winnt.h
+        [Flags]
+        private enum VirtualAllocAllocationType : uint
+        {
+            MEM_COMMIT = 0x1000,
+            MEM_RESERVE = 0x2000,
+            MEM_DECOMMIT = 0x4000,
+            MEM_RELEASE = 0x8000,
+            MEM_FREE = 0x10000,
+            MEM_PRIVATE = 0x20000,
+            MEM_MAPPED = 0x40000,
+            MEM_RESET = 0x80000,
+            MEM_TOP_DOWN = 0x100000,
+            MEM_WRITE_WATCH = 0x200000,
+            MEM_PHYSICAL = 0x400000,
+            MEM_ROTATE = 0x800000,
+            MEM_LARGE_PAGES = 0x20000000,
+            MEM_4MB_PAGES = 0x80000000,
+        }
+
+        // from winnt.h
+        [Flags]
+        private enum VirtualAllocProtection : uint
+        {
+            PAGE_NOACCESS = 0x01,
+            PAGE_READONLY = 0x02,
+            PAGE_READWRITE = 0x04,
+            PAGE_WRITECOPY = 0x08,
+            PAGE_EXECUTE = 0x10,
+            PAGE_EXECUTE_READ = 0x20,
+            PAGE_EXECUTE_READWRITE = 0x40,
+            PAGE_EXECUTE_WRITECOPY = 0x80,
+            PAGE_GUARD = 0x100,
+            PAGE_NOCACHE = 0x200,
+            PAGE_WRITECOMBINE = 0x400,
+        }
+
+        [StructLayout(LayoutKind.Sequential)]
+        private struct MEMORY_BASIC_INFORMATION
+        {
+            public IntPtr BaseAddress;
+            public IntPtr AllocationBase;
+            public VirtualAllocProtection AllocationProtect;
+            public IntPtr RegionSize;
+            public VirtualAllocAllocationType State;
+            public VirtualAllocProtection Protect;
+            public VirtualAllocAllocationType Type;
+        };
+
+        private sealed class VirtualAllocHandle : SafeHandle
+        {
+            // Called by P/Invoke when returning SafeHandles
+            private VirtualAllocHandle()
+                : base(IntPtr.Zero, ownsHandle: true)
+            {
+            }
+
+            // Do not provide a finalizer - SafeHandle's critical finalizer will
+            // call ReleaseHandle for you.
+
+            public override bool IsInvalid => (handle == IntPtr.Zero);
+
+            protected override bool ReleaseHandle() =>
+                UnsafeNativeMethods.VirtualFree(handle, IntPtr.Zero, VirtualAllocAllocationType.MEM_RELEASE);
+        }
+
+        [SuppressUnmanagedCodeSecurity]
+        private static class UnsafeNativeMethods
+        {
+            private const string KERNEL32_LIB = "kernel32.dll";
+
+            // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366887(v=vs.85).aspx
+            [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
+            public static extern VirtualAllocHandle VirtualAlloc(
+                [In] IntPtr lpAddress,
+                [In] IntPtr dwSize,
+                [In] VirtualAllocAllocationType flAllocationType,
+                [In] VirtualAllocProtection flProtect);
+
+            // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366892(v=vs.85).aspx
+            [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
+            [ReliabilityContract(Consistency.WillNotCorruptState, Cer.Success)]
+            [return: MarshalAs(UnmanagedType.Bool)]
+            public static extern bool VirtualFree(
+                [In] IntPtr lpAddress,
+                [In] IntPtr dwSize,
+                [In] VirtualAllocAllocationType dwFreeType);
+
+            // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366898(v=vs.85).aspx
+            [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
+            [return: MarshalAs(UnmanagedType.Bool)]
+            public static extern bool VirtualProtect(
+                [In] IntPtr lpAddress,
+                [In] IntPtr dwSize,
+                [In] VirtualAllocProtection flNewProtect,
+                [Out] out VirtualAllocProtection lpflOldProtect);
+
+            // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366902(v=vs.85).aspx
+            [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
+            public static extern IntPtr VirtualQuery(
+                [In] IntPtr lpAddress,
+                [Out] out MEMORY_BASIC_INFORMATION lpBuffer,
+                [In] IntPtr dwLength);
+        }
+    }
+}
diff --git a/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.cs b/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.cs

new file mode 100644 (file)

index 0000000..cc39ff3
--- /dev/null
+++ b/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.cs
@@ -0,0 +1,49 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace System.Buffers
+{
+    /// <summary>
+    /// Represents a region of native memory. The <see cref="Memory"/> property can be used
+    /// to get a <see cref="Memory{Byte}"/> backed by this memory region.
+    /// </summary>
+    public abstract class BoundedMemory<T> : IDisposable where T : unmanaged
+    {
+        /// <summary>
+        /// Returns a value stating whether this native memory block is readonly.
+        /// </summary>
+        public abstract bool IsReadonly { get; }
+
+        /// <summary>
+        /// Gets the <see cref="Memory{Byte}"/> which represents this native memory.
+        /// This <see cref="BoundedMemory{T}"/> instance must be kept alive while working with the <see cref="Memory{Byte}"/>.
+        /// </summary>
+        public abstract Memory<T> Memory { get; }
+
+        /// <summary>
+        /// Gets the <see cref="Span{Byte}"/> which represents this native memory.
+        /// This <see cref="BoundedMemory{T}"/> instance must be kept alive while working with the <see cref="Span{Byte}"/>.
+        /// </summary>
+        public abstract Span<T> Span { get; }
+
+        /// <summary>
+        /// Disposes this <see cref="BoundedMemory{T}"/> instance.
+        /// </summary>
+        public abstract void Dispose();
+
+        /// <summary>
+        /// Sets this native memory block to be readonly. Writes to this block will cause an AV.
+        /// This method has no effect if the memory block is zero length or if the underlying
+        /// OS does not support marking the memory block as readonly.
+        /// </summary>
+        public abstract void MakeReadonly();
+
+        /// <summary>
+        /// Sets this native memory block to be read+write.
+        /// This method has no effect if the memory block is zero length or if the underlying
+        /// OS does not support marking the memory block as read+write.
+        /// </summary>
+        public abstract void MakeWriteable();
+    }
+}
+\ No newline at end of file
diff --git a/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/PoisonPagePlacement.cs b/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/PoisonPagePlacement.cs

new file mode 100644 (file)

index 0000000..ea2caa3
--- /dev/null
+++ b/src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/PoisonPagePlacement.cs
@@ -0,0 +1,26 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace System.Buffers
+{
+    /// <summary>
+    /// Dictates where the poison page should be placed.
+    /// </summary>
+    public enum PoisonPagePlacement
+    {
+        /// <summary>
+        /// The poison page should be placed immediately after the memory region.
+        /// Attempting to access the memory page immediately following the
+        /// span will result in an AV.
+        /// </summary>
+        After,
+
+        /// <summary>
+        /// The poison page should be placed immediately before the memory region.
+        /// Attempting to access the memory page immediately before the
+        /// span will result in an AV.
+        /// </summary>
+        Before,
+    }
+}
+\ No newline at end of file
diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs

index 116238ebc6b4b432abd353a82c6b8d7847182d65..b075c1518268c32131baf2437918f7ca7c1c60e4 100644 (file)
--- a/src/libraries/System.Runtime/ref/System.Runtime.cs
+++ b/src/libraries/System.Runtime/ref/System.Runtime.cs
@@ -7852,6 +7852,14 @@ namespace System.Text
          void System.IDisposable.Dispose() { }
      }
  }
+namespace System.Text.Unicode
+{
+    public static partial class Utf8
+    {
+        public static System.Buffers.OperationStatus FromUtf16(ReadOnlySpan<char> source, Span<byte> destination, out int charsRead, out int bytesWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true) { throw null; }
+        public static System.Buffers.OperationStatus ToUtf16(ReadOnlySpan<byte> source, Span<char> destination, out int bytesRead, out int charsWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true) { throw null; }
+    }
+}
  namespace System.Threading
  {
      public readonly partial struct CancellationToken
diff --git a/src/libraries/System.Runtime/src/ApiCompatBaseline.netcoreappaot.txt b/src/libraries/System.Runtime/src/ApiCompatBaseline.netcoreappaot.txt

index 7b90b8c0a2cc50d7291e840de2e5e636bb783b0f..e43a6ada8673f441e0a470d3c26109962eeece8b 100644 (file)
--- a/src/libraries/System.Runtime/src/ApiCompatBaseline.netcoreappaot.txt
+++ b/src/libraries/System.Runtime/src/ApiCompatBaseline.netcoreappaot.txt
@@ -8,3 +8,4 @@ MembersMustExist : Member 'System.Text.Rune.DecodeUtf16FromEnd(System.ReadOnlySp
  MembersMustExist : Member 'System.Text.Rune.DecodeUtf8(System.ReadOnlySpan<System.Byte>, System.Text.Rune, System.Int32)' does not exist in the implementation but it does exist in the contract.
  MembersMustExist : Member 'System.Text.Rune.DecodeUtf8FromEnd(System.ReadOnlySpan<System.Byte>, System.Text.Rune, System.Int32)' does not exist in the implementation but it does exist in the contract.
  MembersMustExist : Member 'System.String System.Runtime.CompilerServices.RuntimeFeature.DefaultImplementationsOfInterfaces' does not exist in the implementation but it does exist in the contract.
+TypesMustExist : Type 'System.Text.Unicode.Utf8' does not exist in the implementation but it does exist in the contract.
diff --git a/src/libraries/System.Runtime/src/ApiCompatBaseline.uapaot.txt b/src/libraries/System.Runtime/src/ApiCompatBaseline.uapaot.txt

index b18a31fac0394879cc2dc526bae909a30ecf958a..e43754c8d46f7ad013cf524f3cebbbcc1aa68e87 100644 (file)
--- a/src/libraries/System.Runtime/src/ApiCompatBaseline.uapaot.txt
+++ b/src/libraries/System.Runtime/src/ApiCompatBaseline.uapaot.txt
@@ -34,3 +34,4 @@ MembersMustExist : Member 'System.Text.Rune.DecodeUtf16FromEnd(System.ReadOnlySp
  MembersMustExist : Member 'System.Text.Rune.DecodeUtf8(System.ReadOnlySpan<System.Byte>, System.Text.Rune, System.Int32)' does not exist in the implementation but it does exist in the contract.
  MembersMustExist : Member 'System.Text.Rune.DecodeUtf8FromEnd(System.ReadOnlySpan<System.Byte>, System.Text.Rune, System.Int32)' does not exist in the implementation but it does exist in the contract.
  MembersMustExist : Member 'System.String System.Runtime.CompilerServices.RuntimeFeature.DefaultImplementationsOfInterfaces' does not exist in the implementation but it does exist in the contract.
+TypesMustExist : Type 'System.Text.Unicode.Utf8' does not exist in the implementation but it does exist in the contract.
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj b/src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj

index 1627c8de70fb9ca7a993e23895d90e28b476584a..a2f3d2e61383caecae8f30ee2a16743e47931c00 100644 (file)
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj
@@ -285,6 +285,9 @@
      <Compile Include="System\Text\RuneTests.netcoreapp.cs" />
      <Compile Include="System\Text\RuneTests.TestData.netcoreapp.cs" />
      <Compile Include="System\Text\StringBuilderTests.netcoreapp.cs" />
+    <Compile Include="System\Text\Unicode\Utf8Tests.netcoreapp.cs" />
+    <Compile Include="System\Text\Unicode\Utf8Tests.ToBytes.netcoreapp.cs" />
+    <Compile Include="System\Text\Unicode\Utf8Tests.ToChars.netcoreapp.cs" />
      <Compile Include="System\Type\TypePropertyTests.netcoreapp.cs" />
      <Compile Include="System\Type\TypeTests.netcoreapp.cs" />
      <Compile Include="System\ArgIteratorTests.netcoreapp.cs" />
diff --git a/src/libraries/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToBytes.netcoreapp.cs b/src/libraries/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToBytes.netcoreapp.cs

new file mode 100644 (file)

index 0000000..18ceedc
--- /dev/null
+++ b/src/libraries/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToBytes.netcoreapp.cs
@@ -0,0 +1,264 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Linq;
+using Xunit;
+
+namespace System.Text.Unicode.Tests
+{
+    public partial class Utf8Tests
+    {
+        [Theory]
+        [InlineData("", "")] // empty string is OK
+        [InlineData(X_UTF16, X_UTF8)]
+        [InlineData(E_ACUTE_UTF16, E_ACUTE_UTF8)]
+        [InlineData(EURO_SYMBOL_UTF16, EURO_SYMBOL_UTF8)]
+        public void ToBytes_WithSmallValidBuffers(string utf16Input, string expectedUtf8TranscodingHex)
+        {
+            // These test cases are for the "slow processing" code path at the end of TranscodeToUtf8,
+            // so inputs should be less than 2 chars.
+
+            Assert.InRange(utf16Input.Length, 0, 1);
+
+            ToBytes_Test_Core(
+                utf16Input: utf16Input,
+                destinationSize: expectedUtf8TranscodingHex.Length / 2,
+                replaceInvalidSequences: false,
+                isFinalChunk: false,
+                expectedOperationStatus: OperationStatus.Done,
+                expectedNumCharsRead: utf16Input.Length,
+                expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex));
+        }
+
+        [Theory]
+        [InlineData("AB")] // 2 ASCII chars, hits fast inner loop
+        [InlineData("ABCD")] // 4 ASCII chars, hits fast inner loop
+        [InlineData("ABCDEF")] // 6 ASCII chars, hits fast inner loop
+        [InlineData("ABCDEFGH")] // 8 ASCII chars, hits fast inner loop
+        [InlineData("ABCDEFGHIJ")] // 10 ASCII chars, hits fast inner loop
+        [InlineData("ABCDEF" + E_ACUTE_UTF16 + "HIJ")] // interrupts inner loop due to non-ASCII char in first char of first DWORD
+        [InlineData("ABCDEFG" + EURO_SYMBOL_UTF16 + "IJ")] // interrupts inner loop due to non-ASCII char in second char of first DWORD
+        [InlineData("ABCDEFGH" + E_ACUTE_UTF16 + "J")] // interrupts inner loop due to non-ASCII char in first char of second DWORD
+        [InlineData("ABCDEFGHI" + EURO_SYMBOL_UTF16)] // interrupts inner loop due to non-ASCII char in second char of second DWORD
+        [InlineData(X_UTF16 + E_ACUTE_UTF16)] // drains first ASCII char then falls down to slow path
+        [InlineData(X_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // drains first ASCII char then consumes 2x 2-byte sequences at once
+        [InlineData(E_ACUTE_UTF16 + X_UTF16)] // no first ASCII char to drain, consumes 2-byte seq followed by ASCII char
+        [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // stay within 2x 2-byte sequence processing loop
+        [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + X_UTF16)] // break out of 2x 2-byte seq loop due to ASCII data in second char of DWORD
+        [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + X_UTF16 + X_UTF16)] // break out of 2x 2-byte seq loop due to ASCII data in first char of DWORD
+        [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + EURO_SYMBOL_UTF16)] // break out of 2x 2-byte seq loop due to 3-byte data
+        [InlineData(E_ACUTE_UTF16 + EURO_SYMBOL_UTF16)] // 2-byte logic sees next char isn't ASCII, cannot read full DWORD from remaining input buffer, falls down to slow drain loop
+        [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + X_UTF16)] // 2x 3-byte logic can't read a full DWORD from next part of buffer, falls down to slow drain loop
+        [InlineData(EURO_SYMBOL_UTF16 + X_UTF16)] // 3-byte processing loop consumes trailing ASCII char, but can't read next DWORD, falls down to slow drain loop
+        [InlineData(EURO_SYMBOL_UTF16 + X_UTF16 + X_UTF16)] // 3-byte processing loop consumes trailing ASCII char, but can't read next DWORD, falls down to slow drain loop
+        [InlineData(EURO_SYMBOL_UTF16 + E_ACUTE_UTF16)] // 3-byte processing loop can't consume next ASCII char, can't read DWORD, falls down to slow drain loop
+        [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // stay within 2x 3-byte sequence processing loop
+        [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + X_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // consume stray ASCII char at beginning of DWORD after 2x 3-byte sequence
+        [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + X_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // consume stray ASCII char at end of DWORD after 2x 3-byte sequence
+        [InlineData(EURO_SYMBOL_UTF16 + E_ACUTE_UTF16 + X_UTF16)] // consume 2-byte sequence as second char in DWORD which begins with 3-byte encoded char
+        [InlineData(EURO_SYMBOL_UTF16 + GRINNING_FACE_UTF16)] // 3-byte sequence followed by 4-byte sequence
+        [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + GRINNING_FACE_UTF16)] // 2x 3-byte sequence followed by 4-byte sequence
+        [InlineData(GRINNING_FACE_UTF16)] // single 4-byte surrogate char pair
+        [InlineData(GRINNING_FACE_UTF16 + EURO_SYMBOL_UTF16)] // 4-byte surrogate char pair, cannot read next DWORD, falls down to slow drain loop
+        public void ToBytes_WithLargeValidBuffers(string utf16Input)
+        {
+            // These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf8,
+            // so inputs should be at least 2 chars.
+
+            Assert.True(utf16Input.Length >= 2);
+
+            // We're going to run the tests with destination buffer lengths ranging from 0 all the way
+            // to buffers large enough to hold the full output. This allows us to test logic that
+            // detects whether we're about to overrun our destination buffer and instead returns DestinationTooSmall.
+
+            Rune[] enumeratedScalars = utf16Input.EnumerateRunes().ToArray();
+
+            // 0-length buffer test
+            ToBytes_Test_Core(
+                utf16Input: utf16Input,
+                destinationSize: 0,
+                replaceInvalidSequences: false,
+                isFinalChunk: false,
+                expectedOperationStatus: OperationStatus.DestinationTooSmall,
+                expectedNumCharsRead: 0,
+                expectedUtf8Transcoding: ReadOnlySpan<byte>.Empty);
+
+            int expectedNumCharsConsumed = 0;
+            byte[] concatenatedUtf8 = Array.Empty<byte>();
+
+            for (int i = 0; i < enumeratedScalars.Length; i++)
+            {
+                Rune thisScalar = enumeratedScalars[i];
+
+                // provide partial destination buffers all the way up to (but not including) enough to hold the next full scalar encoding
+                for (int j = 1; j < thisScalar.Utf8SequenceLength; j++)
+                {
+                    ToBytes_Test_Core(
+                        utf16Input: utf16Input,
+                        destinationSize: concatenatedUtf8.Length + j,
+                        replaceInvalidSequences: false,
+                        isFinalChunk: false,
+                        expectedOperationStatus: OperationStatus.DestinationTooSmall,
+                        expectedNumCharsRead: expectedNumCharsConsumed,
+                        expectedUtf8Transcoding: concatenatedUtf8);
+                }
+
+                // now provide a destination buffer large enough to hold the next full scalar encoding
+
+                expectedNumCharsConsumed += thisScalar.Utf16SequenceLength;
+                concatenatedUtf8 = concatenatedUtf8.Concat(ToUtf8(thisScalar)).ToArray();
+
+                ToBytes_Test_Core(
+                   utf16Input: utf16Input,
+                   destinationSize: concatenatedUtf8.Length,
+                   replaceInvalidSequences: false,
+                   isFinalChunk: false,
+                   expectedOperationStatus: (i == enumeratedScalars.Length - 1) ? OperationStatus.Done : OperationStatus.DestinationTooSmall,
+                   expectedNumCharsRead: expectedNumCharsConsumed,
+                   expectedUtf8Transcoding: concatenatedUtf8);
+            }
+        }
+
+        [Theory]
+        [InlineData('\uD800', OperationStatus.NeedMoreData)] // standalone high surrogate
+        [InlineData('\uDFFF', OperationStatus.InvalidData)] // standalone low surrogate
+        public void ToBytes_WithOnlyStandaloneSurrogates(char charValue, OperationStatus expectedOperationStatus)
+        {
+            ToBytes_Test_Core(
+                utf16Input: new[] { charValue },
+                destinationSize: 0,
+                replaceInvalidSequences: false,
+                isFinalChunk: false,
+                expectedOperationStatus: expectedOperationStatus,
+                expectedNumCharsRead: 0,
+                expectedUtf8Transcoding: Span<byte>.Empty);
+        }
+
+        [Theory]
+        [InlineData("<LOW><HIGH>", 0, "")] // swapped surrogate pair characters
+        [InlineData("A<LOW><HIGH>", 1, "41")] // consume standalone ASCII char, then swapped surrogate pair characters
+        [InlineData("A<HIGH>B", 1, "41")] // consume standalone ASCII char, then standalone high surrogate char
+        [InlineData("A<LOW>B", 1, "41")] // consume standalone ASCII char, then standalone low surrogate char
+        [InlineData("AB<HIGH><HIGH>", 2, "4142")] // consume two ASCII chars, then standalone high surrogate char
+        [InlineData("AB<LOW><LOW>", 2, "4142")] // consume two ASCII chars, then standalone low surrogate char
+        public void ToBytes_WithInvalidSurrogates(string utf16Input, int expectedNumCharsConsumed, string expectedUtf8TranscodingHex)
+        {
+            // xUnit can't handle ill-formed strings in [InlineData], so we replace here.
+
+            utf16Input = utf16Input.Replace("<HIGH>", "\uD800").Replace("<LOW>", "\uDFFF");
+
+            // These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf8,
+            // so inputs should be at least 2 chars.
+
+            Assert.True(utf16Input.Length >= 2);
+
+            ToBytes_Test_Core(
+                utf16Input: utf16Input,
+                destinationSize: expectedUtf8TranscodingHex.Length / 2,
+                replaceInvalidSequences: false,
+                isFinalChunk: false,
+                expectedOperationStatus: OperationStatus.InvalidData,
+                expectedNumCharsRead: expectedNumCharsConsumed,
+                expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex));
+        }
+
+        [Theory]
+        [InlineData("<LOW><HIGH>", REPLACEMENT_CHAR_UTF8)] // standalone low surr. and incomplete high surr.
+        [InlineData("<HIGH><HIGH>", REPLACEMENT_CHAR_UTF8)] // standalone high surr. and incomplete high surr.
+        [InlineData("<LOW><LOW>", REPLACEMENT_CHAR_UTF8 + REPLACEMENT_CHAR_UTF8)] // standalone low surr. and incomplete low surr.
+        [InlineData("A<LOW>B<LOW>C<HIGH>D", "41" + REPLACEMENT_CHAR_UTF8 + "42" + REPLACEMENT_CHAR_UTF8 + "43" + REPLACEMENT_CHAR_UTF8 + "44")] // standalone low, low, high surrounded by other data
+        public void ToBytes_WithReplacements(string utf16Input, string expectedUtf8TranscodingHex)
+        {
+            // xUnit can't handle ill-formed strings in [InlineData], so we replace here.
+
+            utf16Input = utf16Input.Replace("<HIGH>", "\uD800").Replace("<LOW>", "\uDFFF");
+
+            bool isFinalCharHighSurrogate = char.IsHighSurrogate(utf16Input.Last());
+
+            ToBytes_Test_Core(
+                utf16Input: utf16Input,
+                destinationSize: expectedUtf8TranscodingHex.Length / 2,
+                replaceInvalidSequences: true,
+                isFinalChunk: false,
+                expectedOperationStatus: (isFinalCharHighSurrogate) ? OperationStatus.NeedMoreData : OperationStatus.Done,
+                expectedNumCharsRead: (isFinalCharHighSurrogate) ? (utf16Input.Length - 1) : utf16Input.Length,
+                expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex));
+
+            if (isFinalCharHighSurrogate)
+            {
+                // Also test with isFinalChunk = true
+                ToBytes_Test_Core(
+                    utf16Input: utf16Input,
+                    destinationSize: expectedUtf8TranscodingHex.Length / 2 + Rune.ReplacementChar.Utf8SequenceLength /* for replacement char */,
+                    replaceInvalidSequences: true,
+                    isFinalChunk: true,
+                    expectedOperationStatus: OperationStatus.Done,
+                    expectedNumCharsRead: utf16Input.Length,
+                    expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex + REPLACEMENT_CHAR_UTF8));
+            }
+        }
+
+        [Theory]
+        [InlineData(E_ACUTE_UTF16 + "<LOW>", true, 1, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8)] // not enough output buffer to hold U+FFFD
+        [InlineData(E_ACUTE_UTF16 + "<LOW>", true, 2, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // replace standalone low surr. at end
+        [InlineData(E_ACUTE_UTF16 + "<HIGH>", true, 1, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8)] // not enough output buffer to hold U+FFFD
+        [InlineData(E_ACUTE_UTF16 + "<HIGH>", true, 2, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // replace standalone high surr. at end
+        [InlineData(E_ACUTE_UTF16 + "<HIGH>", false, 1, OperationStatus.NeedMoreData, E_ACUTE_UTF8)] // don't replace standalone high surr. at end
+        [InlineData(E_ACUTE_UTF16 + "<HIGH>" + X_UTF16, true, 2, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // not enough output buffer to hold 'X'
+        [InlineData(E_ACUTE_UTF16 + "<HIGH>" + X_UTF16, false, 2, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // not enough output buffer to hold 'X'
+        [InlineData(E_ACUTE_UTF16 + "<HIGH>" + X_UTF16, true, 3, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8 + X_UTF8)] // replacement followed by 'X'
+        [InlineData(E_ACUTE_UTF16 + "<HIGH>" + X_UTF16, false, 3, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8 + X_UTF8)] // replacement followed by 'X'
+        public void ToBytes_WithReplacements_AndCustomBufferSizes(string utf16Input, bool isFinalChunk, int expectedNumCharsConsumed, OperationStatus expectedOperationStatus, string expectedUtf8TranscodingHex)
+        {
+            // xUnit can't handle ill-formed strings in [InlineData], so we replace here.
+
+            utf16Input = utf16Input.Replace("<HIGH>", "\uD800").Replace("<LOW>", "\uDFFF");
+
+            ToBytes_Test_Core(
+                utf16Input: utf16Input,
+                destinationSize: expectedUtf8TranscodingHex.Length / 2,
+                replaceInvalidSequences: true,
+                isFinalChunk: isFinalChunk,
+                expectedOperationStatus: expectedOperationStatus,
+                expectedNumCharsRead: expectedNumCharsConsumed,
+                expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex));
+        }
+
+        [Fact]
+        public void ToBytes_AllPossibleScalarValues()
+        {
+            ToBytes_Test_Core(
+                utf16Input: s_allScalarsAsUtf16.Span,
+                destinationSize: s_allScalarsAsUtf8.Length,
+                replaceInvalidSequences: false,
+                isFinalChunk: false,
+                expectedOperationStatus: OperationStatus.Done,
+                expectedNumCharsRead: s_allScalarsAsUtf16.Length,
+                expectedUtf8Transcoding: s_allScalarsAsUtf8.Span);
+        }
+
+        private static void ToBytes_Test_Core(ReadOnlySpan<char> utf16Input, int destinationSize, bool replaceInvalidSequences, bool isFinalChunk, OperationStatus expectedOperationStatus, int expectedNumCharsRead, ReadOnlySpan<byte> expectedUtf8Transcoding)
+        {
+            // Arrange
+
+            using (BoundedMemory<char> boundedSource = BoundedMemory.AllocateFromExistingData(utf16Input))
+            using (BoundedMemory<byte> boundedDestination = BoundedMemory.Allocate<byte>(destinationSize))
+            {
+                boundedSource.MakeReadonly();
+
+                // Act
+
+                OperationStatus actualOperationStatus = Utf8.FromUtf16(boundedSource.Span, boundedDestination.Span, out int actualNumCharsRead, out int actualNumBytesWritten, replaceInvalidSequences, isFinalChunk);
+
+                // Assert
+
+                Assert.Equal(expectedOperationStatus, actualOperationStatus);
+                Assert.Equal(expectedNumCharsRead, actualNumCharsRead);
+                Assert.Equal(expectedUtf8Transcoding.Length, actualNumBytesWritten);
+                Assert.Equal(expectedUtf8Transcoding.ToArray(), boundedDestination.Span.Slice(0, actualNumBytesWritten).ToArray());
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToChars.netcoreapp.cs b/src/libraries/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToChars.netcoreapp.cs

new file mode 100644 (file)

index 0000000..6dda95d
--- /dev/null
+++ b/src/libraries/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToChars.netcoreapp.cs
@@ -0,0 +1,304 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Linq;
+using Xunit;
+
+namespace System.Text.Unicode.Tests
+{
+    public partial class Utf8Tests
+    {
+        [Theory]
+        [InlineData("80", 0, "")] // sequence cannot begin with continuation character
+        [InlineData("8182", 0, "")] // sequence cannot begin with continuation character
+        [InlineData("838485", 0, "")] // sequence cannot begin with continuation character
+        [InlineData(X_UTF8 + "80", 1, X_UTF16)] // sequence cannot begin with continuation character
+        [InlineData(X_UTF8 + "8182", 1, X_UTF16)] // sequence cannot begin with continuation character
+        [InlineData("C0", 0, "")] // [ C0 ] is always invalid
+        [InlineData("C080", 0, "")] // [ C0 ] is always invalid
+        [InlineData("C08081", 0, "")] // [ C0 ] is always invalid
+        [InlineData(X_UTF8 + "C1", 1, X_UTF16)] // [ C1 ] is always invalid
+        [InlineData(X_UTF8 + "C180", 1, X_UTF16)] // [ C1 ] is always invalid
+        [InlineData(X_UTF8 + "C27F", 1, X_UTF16)] // [ C2 ] is improperly terminated
+        [InlineData("E2827F", 0, "")] // [ E2 82 ] is improperly terminated
+        [InlineData("E09F80", 0, "")] // [ E0 9F ... ] is overlong
+        [InlineData("E0C080", 0, "")] // [ E0 ] is improperly terminated
+        [InlineData("ED7F80", 0, "")] // [ ED ] is improperly terminated
+        [InlineData("EDA080", 0, "")] // [ ED A0 ... ] is surrogate
+        public void ToChars_WithSmallInvalidBuffers(string utf8HexInput, int expectedNumBytesConsumed, string expectedUtf16Transcoding)
+        {
+            // These test cases are for the "slow processing" code path at the end of TranscodeToUtf16,
+            // so inputs should be less than 4 bytes.
+
+            Assert.InRange(utf8HexInput.Length, 0, 6);
+
+            ToChars_Test_Core(
+              utf8Input: DecodeHex(utf8HexInput),
+              destinationSize: expectedUtf16Transcoding.Length,
+              replaceInvalidSequences: false,
+              isFinalChunk: false,
+              expectedOperationStatus: OperationStatus.InvalidData,
+              expectedNumBytesRead: expectedNumBytesConsumed,
+              expectedUtf16Transcoding: expectedUtf16Transcoding);
+        }
+
+        [Theory]
+        [InlineData("C2", 0, "")] // [ C2 ] is an incomplete sequence
+        [InlineData("E282", 0, "")] // [ E2 82 ] is an incomplete sequence
+        [InlineData(X_UTF8 + "C2", 1, X_UTF16)] // [ C2 ] is an incomplete sequence
+        [InlineData(X_UTF8 + "E0", 1, X_UTF16)] // [ E0 ] is an incomplete sequence
+        [InlineData(X_UTF8 + "E0BF", 1, X_UTF16)] // [ E0 BF ] is an incomplete sequence
+        [InlineData(X_UTF8 + "F0", 1, X_UTF16)] // [ F0 ] is an incomplete sequence
+        [InlineData(X_UTF8 + "F0BF", 1, X_UTF16)] // [ F0 BF ] is an incomplete sequence
+        [InlineData(X_UTF8 + "F0BFA0", 1, X_UTF16)] // [ F0 BF A0 ] is an incomplete sequence
+        [InlineData(E_ACUTE_UTF8 + "C2", 2, E_ACUTE_UTF16)] // [ C2 ] is an incomplete sequence
+        [InlineData(E_ACUTE_UTF8 + "E0", 2, E_ACUTE_UTF16)] // [ E0 ] is an incomplete sequence
+        [InlineData(E_ACUTE_UTF8 + "F0", 2, E_ACUTE_UTF16)] // [ F0 ] is an incomplete sequence
+        [InlineData(E_ACUTE_UTF8 + "E0BF", 2, E_ACUTE_UTF16)] // [ E0 BF ] is an incomplete sequence
+        [InlineData(E_ACUTE_UTF8 + "F0BF", 2, E_ACUTE_UTF16)] // [ F0 BF ] is an incomplete sequence
+        [InlineData(EURO_SYMBOL_UTF8 + "C2", 3, EURO_SYMBOL_UTF16)] // [ C2 ] is an incomplete sequence
+        [InlineData(EURO_SYMBOL_UTF8 + "E0", 3, EURO_SYMBOL_UTF16)] // [ E0 ] is an incomplete sequence
+        [InlineData(EURO_SYMBOL_UTF8 + "F0", 3, EURO_SYMBOL_UTF16)] // [ F0 ] is an incomplete sequence
+        public void ToChars_WithVariousIncompleteBuffers(string utf8HexInput, int expectedNumBytesConsumed, string expectedUtf16Transcoding)
+        {
+            // These test cases are for the "slow processing" code path at the end of TranscodeToUtf16,
+            // so inputs should be less than 4 bytes.
+
+            ToChars_Test_Core(
+              utf8Input: DecodeHex(utf8HexInput),
+              destinationSize: expectedUtf16Transcoding.Length,
+              replaceInvalidSequences: false,
+              isFinalChunk: false,
+              expectedOperationStatus: OperationStatus.NeedMoreData,
+              expectedNumBytesRead: expectedNumBytesConsumed,
+              expectedUtf16Transcoding: expectedUtf16Transcoding);
+        }
+
+        [Theory]
+        /* SMALL VALID BUFFERS - tests drain loop at end of method */
+        [InlineData("")] // empty string is OK
+        [InlineData("X")]
+        [InlineData("XY")]
+        [InlineData("XYZ")]
+        [InlineData(E_ACUTE_UTF16)]
+        [InlineData(X_UTF16 + E_ACUTE_UTF16)]
+        [InlineData(E_ACUTE_UTF16 + X_UTF16)]
+        [InlineData(EURO_SYMBOL_UTF16)]
+        /* LARGE VALID BUFFERS - test main loop at beginning of method */
+        [InlineData(E_ACUTE_UTF16 + "ABCD" + "0123456789:;<=>?")] // Loop unrolling at end of buffer
+        [InlineData(E_ACUTE_UTF16 + "ABCD" + "0123456789:;<=>?" + "01234567" + E_ACUTE_UTF16 + "89:;<=>?")] // Loop unrolling interrupted by non-ASCII
+        [InlineData("ABC" + E_ACUTE_UTF16 + "0123")] // 3 ASCII bytes followed by non-ASCII
+        [InlineData("AB" + E_ACUTE_UTF16 + "0123")] // 2 ASCII bytes followed by non-ASCII
+        [InlineData("A" + E_ACUTE_UTF16 + "0123")] // 1 ASCII byte followed by non-ASCII
+        [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // 4x 2-byte sequences, exercises optimization code path in 2-byte sequence processing
+        [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + "PQ")] // 3x 2-byte sequences + 2 ASCII bytes, exercises optimization code path in 2-byte sequence processing
+        [InlineData(E_ACUTE_UTF16 + "PQ")] // single 2-byte sequence + 2 trailing ASCII bytes, exercises draining logic in 2-byte sequence processing
+        [InlineData(E_ACUTE_UTF16 + "P" + E_ACUTE_UTF16 + "0@P")] // single 2-byte sequences + 1 trailing ASCII byte + 2-byte sequence, exercises draining logic in 2-byte sequence processing
+        [InlineData(EURO_SYMBOL_UTF16 + "@")] // single 3-byte sequence + 1 trailing ASCII byte, exercises draining logic in 3-byte sequence processing
+        [InlineData(EURO_SYMBOL_UTF16 + "@P`")] // single 3-byte sequence + 3 trailing ASCII byte, exercises draining logic and "running out of data" logic in 3-byte sequence processing
+        [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // 3x 3-byte sequences, exercises "stay within 3-byte loop" logic in 3-byte sequence processing
+        [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // 4x 3-byte sequences, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing
+        [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + E_ACUTE_UTF16)] // 3x 3-byte sequences + single 2-byte sequence, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing
+        [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // 2x 3-byte sequences + 4x 2-byte sequences, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing
+        [InlineData(GRINNING_FACE_UTF16 + GRINNING_FACE_UTF16)] // 2x 4-byte sequences, exercises 4-byte sequence processing
+        [InlineData(GRINNING_FACE_UTF16 + "@AB")] // single 4-byte sequence + 3 ASCII bytes, exercises 4-byte sequence processing and draining logic
+        [InlineData("\U0001F938\U0001F3FD\u200D\u2640\uFE0F")] // U+1F938 U+1F3FD U+200D U+2640 U+FE0F WOMAN CARTWHEELING: MEDIUM SKIN TONE, exercising switching between multiple sequence lengths
+        public void ToChars_ValidBuffers(string utf16Input)
+        {
+            // We're going to run the tests with destination buffer lengths ranging from 0 all the way
+            // to buffers large enough to hold the full output. This allows us to test logic that
+            // detects whether we're about to overrun our destination buffer and instead returns DestinationTooSmall.
+
+            Rune[] enumeratedScalars = utf16Input.EnumerateRunes().ToArray();
+
+            // Convert entire input to UTF-8 using our unit test reference logic.
+
+            byte[] utf8Input = enumeratedScalars.SelectMany(ToUtf8).ToArray();
+
+            // 0-length buffer test
+            ToChars_Test_Core(
+                utf8Input: utf8Input,
+                destinationSize: 0,
+                replaceInvalidSequences: false,
+                isFinalChunk: false,
+                expectedOperationStatus: (utf8Input.Length == 0) ? OperationStatus.Done : OperationStatus.DestinationTooSmall,
+                expectedNumBytesRead: 0,
+                expectedUtf16Transcoding: ReadOnlySpan<char>.Empty);
+
+            int expectedNumBytesConsumed = 0;
+            char[] concatenatedUtf16 = Array.Empty<char>();
+
+            for (int i = 0; i < enumeratedScalars.Length; i++)
+            {
+                Rune thisScalar = enumeratedScalars[i];
+
+                // if this is an astral scalar value, quickly test a buffer that's not large enough to contain the entire UTF-16 encoding
+
+                if (!thisScalar.IsBmp)
+                {
+                    ToChars_Test_Core(
+                        utf8Input: utf8Input,
+                        destinationSize: concatenatedUtf16.Length + 1,
+                        replaceInvalidSequences: false,
+                        isFinalChunk: false,
+                        expectedOperationStatus: OperationStatus.DestinationTooSmall,
+                        expectedNumBytesRead: expectedNumBytesConsumed,
+                        expectedUtf16Transcoding: concatenatedUtf16);
+                }
+
+                // now provide a destination buffer large enough to hold the next full scalar encoding
+
+                expectedNumBytesConsumed += thisScalar.Utf8SequenceLength;
+                concatenatedUtf16 = concatenatedUtf16.Concat(ToUtf16(thisScalar)).ToArray();
+
+                ToChars_Test_Core(
+                    utf8Input: utf8Input,
+                    destinationSize: concatenatedUtf16.Length,
+                    replaceInvalidSequences: false,
+                    isFinalChunk: false,
+                    expectedOperationStatus: (i == enumeratedScalars.Length - 1) ? OperationStatus.Done : OperationStatus.DestinationTooSmall,
+                    expectedNumBytesRead: expectedNumBytesConsumed,
+                    expectedUtf16Transcoding: concatenatedUtf16);
+            }
+        }
+
+        [Theory]
+        [InlineData("3031" + "80" + "202122232425", 2, "01")] // Continuation character at start of sequence should match no bitmask
+        [InlineData("3031" + "C080" + "2021222324", 2, "01")] // Overlong 2-byte sequence at start of DWORD
+        [InlineData("3031" + "C180" + "2021222324", 2, "01")] // Overlong 2-byte sequence at start of DWORD
+        [InlineData("C280" + "C180", 2, "\u0080")] // Overlong 2-byte sequence at end of DWORD
+        [InlineData("C27F" + "C280", 0, "")] // Improperly terminated 2-byte sequence at start of DWORD
+        [InlineData("C2C0" + "C280", 0, "")] // Improperly terminated 2-byte sequence at start of DWORD
+        [InlineData("C280" + "C27F", 2, "\u0080")] // Improperly terminated 2-byte sequence at end of DWORD
+        [InlineData("C280" + "C2C0", 2, "\u0080")] // Improperly terminated 2-byte sequence at end of DWORD
+        [InlineData("C280" + "C280" + "80203040", 4, "\u0080\u0080")] // Continuation character at start of sequence, within "stay in 2-byte processing" optimization
+        [InlineData("C280" + "C280" + "C180" + "C280", 4, "\u0080\u0080")] // Overlong 2-byte sequence at start of DWORD, within "stay in 2-byte processing" optimization
+        [InlineData("C280" + "C280" + "C280" + "C180", 6, "\u0080\u0080\u0080")] // Overlong 2-byte sequence at end of DWORD, within "stay in 2-byte processing" optimization
+        [InlineData("3031" + "E09F80" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Overlong 3-byte sequence at start of DWORD
+        [InlineData("3031" + "E07F80" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD
+        [InlineData("3031" + "E0C080" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD
+        [InlineData("3031" + "E17F80" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD
+        [InlineData("3031" + "E1C080" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD
+        [InlineData("3031" + "EDA080" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Surrogate 3-byte sequence at start of DWORD
+        [InlineData("3031" + "F5808080", 2, "01")] // [ F5 ] is always invalid
+        [InlineData("3031" + "F6808080", 2, "01")] // [ F6 ] is always invalid
+        [InlineData("3031" + "F7808080", 2, "01")] // [ F7 ] is always invalid
+        [InlineData("3031" + "F8808080", 2, "01")] // [ F8 ] is always invalid
+        [InlineData("3031" + "F9808080", 2, "01")] // [ F9 ] is always invalid
+        [InlineData("3031" + "FA808080", 2, "01")] // [ FA ] is always invalid
+        [InlineData("3031" + "FB808080", 2, "01")] // [ FB ] is always invalid
+        [InlineData("3031" + "FC808080", 2, "01")] // [ FC ] is always invalid
+        [InlineData("3031" + "FD808080", 2, "01")] // [ FD ] is always invalid
+        [InlineData("3031" + "FE808080", 2, "01")] // [ FE ] is always invalid
+        [InlineData("3031" + "FF808080", 2, "01")] // [ FF ] is always invalid
+        public void ToChars_WithLargeInvalidBuffers(string utf8HexInput, int expectedNumBytesConsumed, string expectedUtf16Transcoding)
+        {
+            // These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf16,
+            // so inputs should be less >= 4 bytes.
+
+            Assert.True(utf8HexInput.Length >= 8);
+
+            ToChars_Test_Core(
+                utf8Input: DecodeHex(utf8HexInput),
+                destinationSize: expectedUtf16Transcoding.Length,
+                replaceInvalidSequences: false,
+                isFinalChunk: false,
+                expectedOperationStatus: OperationStatus.InvalidData,
+                expectedNumBytesRead: expectedNumBytesConsumed,
+                expectedUtf16Transcoding: expectedUtf16Transcoding);
+        }
+
+        [Theory]
+        [InlineData(X_UTF8 + "80" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // stray continuation byte [ 80 ]
+        [InlineData(X_UTF8 + "FF" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // invalid UTF-8 byte [ FF ]
+        [InlineData(X_UTF8 + "C2" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // 2-byte sequence starter [ C2 ] not followed by continuation byte
+        [InlineData(X_UTF8 + "C1C180" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ C1 80 ] is overlong but consists of two maximal invalid subsequences, each of length 1 byte
+        [InlineData(X_UTF8 + E_ACUTE_UTF8 + "E08080", X_UTF16 + E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16)] // [ E0 80 ] is overlong 2-byte sequence (1 byte maximal invalid subsequence), and following [ 80 ] is stray continuation byte
+        [InlineData(GRINNING_FACE_UTF8 + "F08F8080" + GRINNING_FACE_UTF8, GRINNING_FACE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + GRINNING_FACE_UTF16)] // [ F0 8F ] is overlong 4-byte sequence (1 byte maximal invalid subsequence), and following [ 80 ] instances are stray continuation bytes
+        [InlineData(GRINNING_FACE_UTF8 + "F4908080" + GRINNING_FACE_UTF8, GRINNING_FACE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + GRINNING_FACE_UTF16)] // [ F4 90 ] is out-of-range 4-byte sequence (1 byte maximal invalid subsequence), and following [ 80 ] instances are stray continuation bytes
+        [InlineData(E_ACUTE_UTF8 + "EDA0" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ ED A0 ] is encoding of UTF-16 surrogate code point, so consists of two maximal invalid subsequences, each of length 1 byte
+        [InlineData(E_ACUTE_UTF8 + "ED80" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ ED 80 ] is incomplete 3-byte sequence, so is 2-byte maximal invalid subsequence
+        [InlineData(E_ACUTE_UTF8 + "F380" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ F3 80 ] is incomplete 4-byte sequence, so is 2-byte maximal invalid subsequence
+        [InlineData(E_ACUTE_UTF8 + "F38080" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ F3 80 80 ] is incomplete 4-byte sequence, so is 3-byte maximal invalid subsequence
+        public void ToChars_WithReplacement(string utf8HexInput, string expectedUtf16Transcoding)
+        {
+            // First run the test with isFinalBlock = false,
+            // both with and without some bytes of incomplete trailing data.
+
+            ToChars_Test_Core(
+                utf8Input: DecodeHex(utf8HexInput),
+                destinationSize: expectedUtf16Transcoding.Length,
+                replaceInvalidSequences: true,
+                isFinalChunk: false,
+                expectedOperationStatus: OperationStatus.Done,
+                expectedNumBytesRead: utf8HexInput.Length / 2,
+                expectedUtf16Transcoding: expectedUtf16Transcoding);
+
+            ToChars_Test_Core(
+                utf8Input: DecodeHex(utf8HexInput + "E0BF" /* trailing data */),
+                destinationSize: expectedUtf16Transcoding.Length,
+                replaceInvalidSequences: true,
+                isFinalChunk: false,
+                expectedOperationStatus: OperationStatus.NeedMoreData,
+                expectedNumBytesRead: utf8HexInput.Length / 2,
+                expectedUtf16Transcoding: expectedUtf16Transcoding);
+
+            // Then run the test with isFinalBlock = true, with incomplete trailing data.
+
+            ToChars_Test_Core(
+                utf8Input: DecodeHex(utf8HexInput + "E0BF" /* trailing data */),
+                destinationSize: expectedUtf16Transcoding.Length,
+                replaceInvalidSequences: true,
+                isFinalChunk: true,
+                expectedOperationStatus: OperationStatus.DestinationTooSmall,
+                expectedNumBytesRead: utf8HexInput.Length / 2,
+                expectedUtf16Transcoding: expectedUtf16Transcoding);
+
+            ToChars_Test_Core(
+                 utf8Input: DecodeHex(utf8HexInput + "E0BF" /* trailing data */),
+                 destinationSize: expectedUtf16Transcoding.Length + 1, // allow room for U+FFFD
+                 replaceInvalidSequences: true,
+                 isFinalChunk: true,
+                 expectedOperationStatus: OperationStatus.Done,
+                 expectedNumBytesRead: utf8HexInput.Length / 2 + 2,
+                 expectedUtf16Transcoding: expectedUtf16Transcoding + REPLACEMENT_CHAR_UTF16);
+        }
+
+        [Fact]
+        public void ToChars_AllPossibleScalarValues()
+        {
+            ToChars_Test_Core(
+                utf8Input: s_allScalarsAsUtf8.Span,
+                destinationSize: s_allScalarsAsUtf16.Length,
+                replaceInvalidSequences: false,
+                isFinalChunk: false,
+                expectedOperationStatus: OperationStatus.Done,
+                expectedNumBytesRead: s_allScalarsAsUtf8.Length,
+                expectedUtf16Transcoding: s_allScalarsAsUtf16.Span);
+        }
+
+        private static void ToChars_Test_Core(ReadOnlySpan<byte> utf8Input, int destinationSize, bool replaceInvalidSequences, bool isFinalChunk, OperationStatus expectedOperationStatus, int expectedNumBytesRead, ReadOnlySpan<char> expectedUtf16Transcoding)
+        {
+            // Arrange
+
+            using (BoundedMemory<byte> boundedSource = BoundedMemory.AllocateFromExistingData(utf8Input))
+            using (BoundedMemory<char> boundedDestination = BoundedMemory.Allocate<char>(destinationSize))
+            {
+                boundedSource.MakeReadonly();
+
+                // Act
+
+                OperationStatus actualOperationStatus = Utf8.ToUtf16(boundedSource.Span, boundedDestination.Span, out int actualNumBytesRead, out int actualNumCharsWritten, replaceInvalidSequences, isFinalChunk);
+
+                // Assert
+
+                Assert.Equal(expectedOperationStatus, actualOperationStatus);
+                Assert.Equal(expectedNumBytesRead, actualNumBytesRead);
+                Assert.Equal(expectedUtf16Transcoding.Length, actualNumCharsWritten);
+                Assert.Equal(expectedUtf16Transcoding.ToString(), boundedDestination.Span.Slice(0, actualNumCharsWritten).ToString());
+            }
+        }
+    }
+}
diff --git a/src/libraries/System.Runtime/tests/System/Text/Unicode/Utf8Tests.netcoreapp.cs b/src/libraries/System.Runtime/tests/System/Text/Unicode/Utf8Tests.netcoreapp.cs

new file mode 100644 (file)

index 0000000..087235a
--- /dev/null
+++ b/src/libraries/System.Runtime/tests/System/Text/Unicode/Utf8Tests.netcoreapp.cs
@@ -0,0 +1,141 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text.RegularExpressions;
+using Xunit;
+
+namespace System.Text.Unicode.Tests
+{
+    public partial class Utf8Tests
+    {
+        private const string X_UTF8 = "58"; // U+0058 LATIN CAPITAL LETTER X, 1 byte
+        private const string X_UTF16 = "X";
+
+        private const string Y_UTF8 = "59"; // U+0058 LATIN CAPITAL LETTER Y, 1 byte
+        private const string Y_UTF16 = "Y";
+
+        private const string Z_UTF8 = "5A"; // U+0058 LATIN CAPITAL LETTER Z, 1 byte
+        private const string Z_UTF16 = "Z";
+
+        private const string E_ACUTE_UTF8 = "C3A9"; // U+00E9 LATIN SMALL LETTER E WITH ACUTE, 2 bytes
+        private const string E_ACUTE_UTF16 = "\u00E9";
+
+        private const string EURO_SYMBOL_UTF8 = "E282AC"; // U+20AC EURO SIGN, 3 bytes
+        private const string EURO_SYMBOL_UTF16 = "\u20AC";
+
+        private const string REPLACEMENT_CHAR_UTF8 = "EFBFBD"; // U+FFFD REPLACEMENT CHAR, 3 bytes
+        private const string REPLACEMENT_CHAR_UTF16 = "\uFFFD";
+
+        private const string GRINNING_FACE_UTF8 = "F09F9880"; // U+1F600 GRINNING FACE, 4 bytes
+        private const string GRINNING_FACE_UTF16 = "\U0001F600";
+        
+        // All valid scalars [ U+0000 .. U+D7FF ] and [ U+E000 .. U+10FFFF ].
+        private static readonly IEnumerable<Rune> s_allValidScalars = Enumerable.Range(0x0000, 0xD800).Concat(Enumerable.Range(0xE000, 0x110000 - 0xE000)).Select(value => new Rune(value));
+
+        private static readonly ReadOnlyMemory<char> s_allScalarsAsUtf16;
+        private static readonly ReadOnlyMemory<byte> s_allScalarsAsUtf8;
+
+        static Utf8Tests()
+        {
+            List<char> allScalarsAsUtf16 = new List<char>();
+            List<byte> allScalarsAsUtf8 = new List<byte>();
+
+            foreach (Rune rune in s_allValidScalars)
+            {
+                allScalarsAsUtf16.AddRange(ToUtf16(rune));
+                allScalarsAsUtf8.AddRange(ToUtf8(rune));
+            }
+
+            s_allScalarsAsUtf16 = allScalarsAsUtf16.ToArray().AsMemory();
+            s_allScalarsAsUtf8 = allScalarsAsUtf8.ToArray().AsMemory();
+        }
+
+        /*
+         * COMMON UTILITIES FOR UNIT TESTS
+         */
+
+        private static byte[] DecodeHex(ReadOnlySpan<char> inputHex)
+        {
+            Assert.True(Regex.IsMatch(inputHex.ToString(), "^([0-9a-fA-F]{2})*$"), "Input must be an even number of hex characters.");
+
+            byte[] retVal = new byte[inputHex.Length / 2];
+            for (int i = 0; i < retVal.Length; i++)
+            {
+                retVal[i] = byte.Parse(inputHex.Slice(i * 2, 2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture);
+            }
+            return retVal;
+        }
+
+        // !! IMPORTANT !!
+        // Don't delete this implementation, as we use it as a reference to make sure the framework's
+        // transcoding logic is correct.
+        private static byte[] ToUtf8(Rune rune)
+        {
+            Assert.True(Rune.IsValid(rune.Value), $"Rune with value U+{(uint)rune.Value:X4} is not well-formed.");
+
+            if (rune.Value < 0x80)
+            {
+                return new[]
+                {
+                    (byte)rune.Value
+                };
+            }
+            else if (rune.Value < 0x0800)
+            {
+                return new[]
+                {
+                    (byte)((rune.Value >> 6) | 0xC0),
+                    (byte)((rune.Value & 0x3F) | 0x80)
+                };
+            }
+            else if (rune.Value < 0x10000)
+            {
+                return new[]
+                {
+                    (byte)((rune.Value >> 12) | 0xE0),
+                    (byte)(((rune.Value >> 6) & 0x3F) | 0x80),
+                    (byte)((rune.Value & 0x3F) | 0x80)
+                };
+            }
+            else
+            {
+                return new[]
+                {
+                    (byte)((rune.Value >> 18) | 0xF0),
+                    (byte)(((rune.Value >> 12) & 0x3F) | 0x80),
+                    (byte)(((rune.Value >> 6) & 0x3F) | 0x80),
+                    (byte)((rune.Value & 0x3F) | 0x80)
+                };
+            }
+        }
+
+        // !! IMPORTANT !!
+        // Don't delete this implementation, as we use it as a reference to make sure the framework's
+        // transcoding logic is correct.
+        private static char[] ToUtf16(Rune rune)
+        {
+            Assert.True(Rune.IsValid(rune.Value), $"Rune with value U+{(uint)rune.Value:X4} is not well-formed.");
+
+            if (rune.IsBmp)
+            {
+                return new[]
+                {
+                    (char)rune.Value
+                };
+            }
+            else
+            {
+                return new[]
+                {
+                    (char)((rune.Value >> 10) + 0xD800 - 0x40),
+                    (char)((rune.Value & 0x03FF) + 0xDC00)
+                };
+            }
+        }
+    }
+}
author	Levi Broderick <GrabYourPitchforks@users.noreply.github.com>
	Mon, 18 Mar 2019 22:05:05 +0000 (15:05 -0700)
committer	GitHub <noreply@github.com>
	Mon, 18 Mar 2019 22:05:05 +0000 (15:05 -0700)
src/libraries/CoreFx.Private.TestUtilities/ref/Configurations.props		patch \| blob \| history
src/libraries/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.csproj		patch \| blob \| history
src/libraries/CoreFx.Private.TestUtilities/ref/CoreFx.Private.TestUtilities.netcoreapp.cs	[new file with mode: 0644]	patch \| blob
src/libraries/CoreFx.Private.TestUtilities/src/CoreFx.Private.TestUtilities.csproj		patch \| blob \| history
src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Creation.cs	[new file with mode: 0644]	patch \| blob
src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Unix.cs	[new file with mode: 0644]	patch \| blob
src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.Windows.cs	[new file with mode: 0644]	patch \| blob
src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/BoundedMemory.cs	[new file with mode: 0644]	patch \| blob
src/libraries/CoreFx.Private.TestUtilities/src/System/Buffers/PoisonPagePlacement.cs	[new file with mode: 0644]	patch \| blob
src/libraries/System.Runtime/ref/System.Runtime.cs		patch \| blob \| history
src/libraries/System.Runtime/src/ApiCompatBaseline.netcoreappaot.txt		patch \| blob \| history
src/libraries/System.Runtime/src/ApiCompatBaseline.uapaot.txt		patch \| blob \| history
src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj		patch \| blob \| history
src/libraries/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToBytes.netcoreapp.cs	[new file with mode: 0644]	patch \| blob
src/libraries/System.Runtime/tests/System/Text/Unicode/Utf8Tests.ToChars.netcoreapp.cs	[new file with mode: 0644]	patch \| blob
src/libraries/System.Runtime/tests/System/Text/Unicode/Utf8Tests.netcoreapp.cs	[new file with mode: 0644]	patch \| blob