<Project>
<PropertyGroup>
<BuildConfigurations>
+ netcoreapp;
netstandard;
+ uap;
</BuildConfigurations>
</PropertyGroup>
-</Project>
\ No newline at end of file
+</Project>
-<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<ProjectGuid>{E2E59C98-998F-9965-991D-99411166AF6F}</ProjectGuid>
<ShouldWriteSigningRequired>false</ShouldWriteSigningRequired>
<AllowReferenceFromRuntime>true</AllowReferenceFromRuntime>
<RuntimeProjectFile>$(RepoRoot)\external\test-runtime\XUnit.Runtime.depproj</RuntimeProjectFile>
- <Configurations>netstandard-Debug;netstandard-Release</Configurations>
+ <Configurations>netcoreapp-Debug;netcoreapp-Release;netstandard-Debug;netstandard-Release;uap-Debug;uap-Release</Configurations>
</PropertyGroup>
<ItemGroup>
<Compile Include="CoreFx.Private.TestUtilities.cs" />
<ReferenceFromRuntime Include="xunit.core" />
</ItemGroup>
+ <ItemGroup Condition="'$(TargetGroup)' == 'netcoreapp' OR '$(TargetGroup)' == 'uap'">
+ <Compile Include="CoreFx.Private.TestUtilities.netcoreapp.cs" />
+ <ProjectReference Include="..\..\System.Collections\ref\System.Collections.csproj" />
+ <ProjectReference Include="..\..\System.Diagnostics.Process\ref\System.Diagnostics.Process.csproj" />
+ <ProjectReference Include="..\..\System.Runtime\ref\System.Runtime.csproj" />
+ <ProjectReference Include="..\..\System.Runtime.InteropServices\ref\System.Runtime.InteropServices.csproj" />
+ </ItemGroup>
</Project>
\ No newline at end of file
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+// ------------------------------------------------------------------------------
+// Changes to this file must follow the http://aka.ms/api-review process.
+// ------------------------------------------------------------------------------
+
+namespace System.Buffers
+{
+ public static partial class BoundedMemory
+ {
+ public static System.Buffers.BoundedMemory<T> Allocate<T>(int elementCount, System.Buffers.PoisonPagePlacement placement = System.Buffers.PoisonPagePlacement.After) where T : unmanaged { throw null; }
+ public static System.Buffers.BoundedMemory<T> AllocateFromExistingData<T>(System.ReadOnlySpan<T> data, System.Buffers.PoisonPagePlacement placement = System.Buffers.PoisonPagePlacement.After) where T : unmanaged { throw null; }
+ public static System.Buffers.BoundedMemory<T> AllocateFromExistingData<T>(T[] data, System.Buffers.PoisonPagePlacement placement = System.Buffers.PoisonPagePlacement.After) where T : unmanaged { throw null; }
+ }
+ public abstract partial class BoundedMemory<T> : IDisposable where T : unmanaged
+ {
+ public abstract bool IsReadonly { get; }
+ public abstract System.Memory<T> Memory { get; }
+ public abstract System.Span<T> Span { get; }
+ public abstract void Dispose();
+ public abstract void MakeReadonly();
+ public abstract void MakeWriteable();
+ }
+ public enum PoisonPagePlacement
+ {
+ After = 0,
+ Before = 1,
+ }
+}
<GeneratePlatformNotSupportedAssemblyMessage Condition="'$(TargetGroup)' == 'netstandard'">Test Utilities are not supported on this platform</GeneratePlatformNotSupportedAssemblyMessage>
<Configurations>netcoreapp-Unix-Debug;netcoreapp-Unix-Release;netcoreapp-Windows_NT-Debug;netcoreapp-Windows_NT-Release;netcoreapp2.0-Unix-Debug;netcoreapp2.0-Unix-Release;netcoreapp2.0-Windows_NT-Debug;netcoreapp2.0-Windows_NT-Release;netcoreappaot-Windows_NT-Debug;netcoreappaot-Windows_NT-Release;netfx-Windows_NT-Debug;netfx-Windows_NT-Release;netstandard-Debug;netstandard-Release;uap-Windows_NT-Debug;uap-Windows_NT-Release;uapaot-Windows_NT-Debug;uapaot-Windows_NT-Release</Configurations>
</PropertyGroup>
+ <ItemGroup>
+ <Compile Include="System\Buffers\BoundedMemory.cs" />
+ <Compile Include="System\Buffers\BoundedMemory.Creation.cs" />
+ <Compile Include="System\Buffers\BoundedMemory.Unix.cs" Condition="'$(TargetsWindows)' != 'true'" />
+ <Compile Include="System\Buffers\BoundedMemory.Windows.cs" Condition="'$(TargetsWindows)' == 'true'" />
+ <Compile Include="System\Buffers\PoisonPagePlacement.cs" />
+ </ItemGroup>
<ItemGroup Condition="'$(TargetGroup)' != 'netstandard'">
<Compile Include="System\AdminHelpers.cs" />
<Compile Include="System\IO\FileCleanupTestBase.cs" />
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Runtime.InteropServices;
+
+namespace System.Buffers
+{
+ /// <summary>
+ /// Contains factory methods to create <see cref="BoundedMemory{T}"/> instances.
+ /// </summary>
+ public static partial class BoundedMemory
+ {
+ /// <summary>
+ /// Allocates a new <see cref="BoundedMemory{T}"/> region which is immediately preceded by
+ /// or immediately followed by a poison (MEM_NOACCESS) page. If <paramref name="placement"/>
+ /// is <see cref="PoisonPagePlacement.Before"/>, then attempting to read the memory
+ /// immediately before the returned <see cref="BoundedMemory{T}"/> will result in an AV.
+ /// If <paramref name="placement"/> is <see cref="PoisonPagePlacement.After"/>, then
+ /// attempting to read the memory immediately after the returned <see cref="BoundedMemory{T}"/>
+ /// will result in AV.
+ /// </summary>
+ /// <remarks>
+ /// The newly-allocated memory will be populated with random data.
+ /// </remarks>
+ public static BoundedMemory<T> Allocate<T>(int elementCount, PoisonPagePlacement placement = PoisonPagePlacement.After) where T : unmanaged
+ {
+ if (elementCount < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(elementCount));
+ }
+ if (placement != PoisonPagePlacement.Before && placement != PoisonPagePlacement.After)
+ {
+ throw new ArgumentOutOfRangeException(nameof(placement));
+ }
+
+ var retVal = AllocateWithoutDataPopulation<T>(elementCount, placement);
+ FillRandom(MemoryMarshal.AsBytes(retVal.Span));
+ return retVal;
+ }
+
+ /// <summary>
+ /// Similar to <see cref="Allocate(int, PoisonPagePlacement)"/>, but populates the allocated
+ /// native memory block from existing data rather than using random data.
+ /// </summary>
+ public static BoundedMemory<T> AllocateFromExistingData<T>(ReadOnlySpan<T> data, PoisonPagePlacement placement = PoisonPagePlacement.After) where T : unmanaged
+ {
+ if (placement != PoisonPagePlacement.Before && placement != PoisonPagePlacement.After)
+ {
+ throw new ArgumentOutOfRangeException(nameof(placement));
+ }
+
+ var retVal = AllocateWithoutDataPopulation<T>(data.Length, placement);
+ data.CopyTo(retVal.Span);
+ return retVal;
+ }
+
+ /// <summary>
+ /// Similar to <see cref="Allocate(int, PoisonPagePlacement)"/>, but populates the allocated
+ /// native memory block from existing data rather than using random data.
+ /// </summary>
+ public static BoundedMemory<T> AllocateFromExistingData<T>(T[] data, PoisonPagePlacement placement = PoisonPagePlacement.After) where T : unmanaged
+ {
+ return AllocateFromExistingData(new ReadOnlySpan<T>(data), placement);
+ }
+
+ private static void FillRandom(Span<byte> buffer)
+ {
+ // Loop over a Random instance manually since Random.NextBytes(Span<byte>) doesn't
+ // exist on all platforms we target.
+
+ Random random = new Random(); // doesn't need to be cryptographically strong
+
+ for (int i = 0; i < buffer.Length; i++)
+ {
+ buffer[i] = (byte)random.Next();
+ }
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace System.Buffers
+{
+ public static partial class BoundedMemory
+ {
+ private static UnixImplementation<T> AllocateWithoutDataPopulation<T>(int elementCount, PoisonPagePlacement placement) where T : unmanaged
+ {
+ // On non-Windows platforms, we don't yet have support for changing the permissions of individual pages.
+
+ return new UnixImplementation<T>(elementCount);
+ }
+
+ private sealed class UnixImplementation<T> : BoundedMemory<T> where T : unmanaged
+ {
+ private readonly T[] _buffer;
+
+ public UnixImplementation(int elementCount)
+ {
+ _buffer = new T[elementCount];
+ }
+
+ public override bool IsReadonly => false;
+
+ public override Memory<T> Memory => _buffer;
+
+ public override Span<T> Span => _buffer;
+
+ public override void Dispose()
+ {
+ // no-op
+ }
+
+ public override void MakeReadonly()
+ {
+ // no-op
+ }
+
+ public override void MakeWriteable()
+ {
+ // no-op
+ }
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Runtime.ConstrainedExecution;
+using System.Runtime.InteropServices;
+using System.Security;
+
+namespace System.Buffers
+{
+ public static unsafe partial class BoundedMemory
+ {
+ private static readonly int SystemPageSize = Environment.SystemPageSize;
+
+ private static WindowsImplementation<T> AllocateWithoutDataPopulation<T>(int elementCount, PoisonPagePlacement placement) where T : unmanaged
+ {
+ long cb, totalBytesToAllocate;
+ checked
+ {
+ cb = elementCount * sizeof(T);
+ totalBytesToAllocate = cb;
+
+ // We only need to round the count up if it's not an exact multiple
+ // of the system page size.
+
+ var leftoverBytes = totalBytesToAllocate % SystemPageSize;
+ if (leftoverBytes != 0)
+ {
+ totalBytesToAllocate += SystemPageSize - leftoverBytes;
+ }
+
+ // Finally, account for the poison pages at the front and back.
+
+ totalBytesToAllocate += 2 * SystemPageSize;
+ }
+
+ // Reserve and commit the entire range as NOACCESS.
+
+ var handle = UnsafeNativeMethods.VirtualAlloc(
+ lpAddress: IntPtr.Zero,
+ dwSize: (IntPtr)totalBytesToAllocate /* cast throws OverflowException if out of range */,
+ flAllocationType: VirtualAllocAllocationType.MEM_RESERVE | VirtualAllocAllocationType.MEM_COMMIT,
+ flProtect: VirtualAllocProtection.PAGE_NOACCESS);
+
+ if (handle == null || handle.IsInvalid)
+ {
+ Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error());
+ throw new InvalidOperationException("VirtualAlloc failed unexpectedly.");
+ }
+
+ // Done allocating! Now carve out a READWRITE section bookended by the NOACCESS
+ // pages and return that carved-out section to the caller. Since memory protection
+ // flags only apply at page-level granularity, we need to "left-align" or "right-
+ // align" the section we carve out so that it's guaranteed adjacent to one of
+ // the NOACCESS bookend pages.
+
+ return new WindowsImplementation<T>(
+ handle: handle,
+ byteOffsetIntoHandle: (placement == PoisonPagePlacement.Before)
+ ? SystemPageSize /* just after leading poison page */
+ : checked((int)(totalBytesToAllocate - SystemPageSize - cb)) /* just before trailing poison page */,
+ elementCount: elementCount)
+ {
+ Protection = VirtualAllocProtection.PAGE_READWRITE
+ };
+ }
+
+ private sealed class WindowsImplementation<T> : BoundedMemory<T> where T : unmanaged
+ {
+ private readonly VirtualAllocHandle _handle;
+ private readonly int _byteOffsetIntoHandle;
+ private readonly int _elementCount;
+ private readonly BoundedMemoryManager _memoryManager;
+
+ internal WindowsImplementation(VirtualAllocHandle handle, int byteOffsetIntoHandle, int elementCount)
+ {
+ _handle = handle;
+ _byteOffsetIntoHandle = byteOffsetIntoHandle;
+ _elementCount = elementCount;
+ _memoryManager = new BoundedMemoryManager(this);
+ }
+
+ public override bool IsReadonly => (Protection != VirtualAllocProtection.PAGE_READWRITE);
+
+ internal VirtualAllocProtection Protection
+ {
+ get
+ {
+ bool refAdded = false;
+ try
+ {
+ _handle.DangerousAddRef(ref refAdded);
+ if (UnsafeNativeMethods.VirtualQuery(
+ lpAddress: _handle.DangerousGetHandle() + _byteOffsetIntoHandle,
+ lpBuffer: out var memoryInfo,
+ dwLength: (IntPtr)sizeof(MEMORY_BASIC_INFORMATION)) == IntPtr.Zero)
+ {
+ Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error());
+ throw new InvalidOperationException("VirtualQuery failed unexpectedly.");
+ }
+ return memoryInfo.Protect;
+ }
+ finally
+ {
+ if (refAdded)
+ {
+ _handle.DangerousRelease();
+ }
+ }
+ }
+ set
+ {
+ if (_elementCount > 0)
+ {
+ bool refAdded = false;
+ try
+ {
+ _handle.DangerousAddRef(ref refAdded);
+ if (!UnsafeNativeMethods.VirtualProtect(
+ lpAddress: _handle.DangerousGetHandle() + _byteOffsetIntoHandle,
+ dwSize: (IntPtr)(&((T*)null)[_elementCount]),
+ flNewProtect: value,
+ lpflOldProtect: out _))
+ {
+ Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error());
+ throw new InvalidOperationException("VirtualProtect failed unexpectedly.");
+ }
+ }
+ finally
+ {
+ if (refAdded)
+ {
+ _handle.DangerousRelease();
+ }
+ }
+ }
+ }
+ }
+
+ public override Memory<T> Memory => _memoryManager.Memory;
+
+ public override Span<T> Span
+ {
+ get
+ {
+ bool refAdded = false;
+ try
+ {
+ _handle.DangerousAddRef(ref refAdded);
+ return new Span<T>((void*)(_handle.DangerousGetHandle() + _byteOffsetIntoHandle), _elementCount);
+ }
+ finally
+ {
+ if (refAdded)
+ {
+ _handle.DangerousRelease();
+ }
+ }
+ }
+ }
+
+ public override void Dispose()
+ {
+ _handle.Dispose();
+ }
+
+ public override void MakeReadonly()
+ {
+ Protection = VirtualAllocProtection.PAGE_READONLY;
+ }
+
+ public override void MakeWriteable()
+ {
+ Protection = VirtualAllocProtection.PAGE_READWRITE;
+ }
+
+ private sealed class BoundedMemoryManager : MemoryManager<T>
+ {
+ private readonly WindowsImplementation<T> _impl;
+
+ public BoundedMemoryManager(WindowsImplementation<T> impl)
+ {
+ _impl = impl;
+ }
+
+ public override Memory<T> Memory => CreateMemory(_impl._elementCount);
+
+ protected override void Dispose(bool disposing)
+ {
+ // no-op; the handle will be disposed separately
+ }
+
+ public override Span<T> GetSpan()
+ {
+ throw new NotImplementedException();
+ }
+
+ public override MemoryHandle Pin(int elementIndex)
+ {
+ if ((uint)elementIndex > (uint)_impl._elementCount)
+ {
+ throw new ArgumentOutOfRangeException(paramName: nameof(elementIndex));
+ }
+
+ bool refAdded = false;
+ try
+ {
+ _impl._handle.DangerousAddRef(ref refAdded);
+ return new MemoryHandle((T*)(_impl._handle.DangerousGetHandle() + _impl._byteOffsetIntoHandle) + elementIndex);
+ }
+ finally
+ {
+ if (refAdded)
+ {
+ _impl._handle.DangerousRelease();
+ }
+ }
+ }
+
+ public override void Unpin()
+ {
+ // no-op - we don't unpin native memory
+ }
+ }
+ }
+
+ // from winnt.h
+ [Flags]
+ private enum VirtualAllocAllocationType : uint
+ {
+ MEM_COMMIT = 0x1000,
+ MEM_RESERVE = 0x2000,
+ MEM_DECOMMIT = 0x4000,
+ MEM_RELEASE = 0x8000,
+ MEM_FREE = 0x10000,
+ MEM_PRIVATE = 0x20000,
+ MEM_MAPPED = 0x40000,
+ MEM_RESET = 0x80000,
+ MEM_TOP_DOWN = 0x100000,
+ MEM_WRITE_WATCH = 0x200000,
+ MEM_PHYSICAL = 0x400000,
+ MEM_ROTATE = 0x800000,
+ MEM_LARGE_PAGES = 0x20000000,
+ MEM_4MB_PAGES = 0x80000000,
+ }
+
+ // from winnt.h
+ [Flags]
+ private enum VirtualAllocProtection : uint
+ {
+ PAGE_NOACCESS = 0x01,
+ PAGE_READONLY = 0x02,
+ PAGE_READWRITE = 0x04,
+ PAGE_WRITECOPY = 0x08,
+ PAGE_EXECUTE = 0x10,
+ PAGE_EXECUTE_READ = 0x20,
+ PAGE_EXECUTE_READWRITE = 0x40,
+ PAGE_EXECUTE_WRITECOPY = 0x80,
+ PAGE_GUARD = 0x100,
+ PAGE_NOCACHE = 0x200,
+ PAGE_WRITECOMBINE = 0x400,
+ }
+
+ [StructLayout(LayoutKind.Sequential)]
+ private struct MEMORY_BASIC_INFORMATION
+ {
+ public IntPtr BaseAddress;
+ public IntPtr AllocationBase;
+ public VirtualAllocProtection AllocationProtect;
+ public IntPtr RegionSize;
+ public VirtualAllocAllocationType State;
+ public VirtualAllocProtection Protect;
+ public VirtualAllocAllocationType Type;
+ };
+
+ private sealed class VirtualAllocHandle : SafeHandle
+ {
+ // Called by P/Invoke when returning SafeHandles
+ private VirtualAllocHandle()
+ : base(IntPtr.Zero, ownsHandle: true)
+ {
+ }
+
+ // Do not provide a finalizer - SafeHandle's critical finalizer will
+ // call ReleaseHandle for you.
+
+ public override bool IsInvalid => (handle == IntPtr.Zero);
+
+ protected override bool ReleaseHandle() =>
+ UnsafeNativeMethods.VirtualFree(handle, IntPtr.Zero, VirtualAllocAllocationType.MEM_RELEASE);
+ }
+
+ [SuppressUnmanagedCodeSecurity]
+ private static class UnsafeNativeMethods
+ {
+ private const string KERNEL32_LIB = "kernel32.dll";
+
+ // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366887(v=vs.85).aspx
+ [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
+ public static extern VirtualAllocHandle VirtualAlloc(
+ [In] IntPtr lpAddress,
+ [In] IntPtr dwSize,
+ [In] VirtualAllocAllocationType flAllocationType,
+ [In] VirtualAllocProtection flProtect);
+
+ // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366892(v=vs.85).aspx
+ [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
+ [ReliabilityContract(Consistency.WillNotCorruptState, Cer.Success)]
+ [return: MarshalAs(UnmanagedType.Bool)]
+ public static extern bool VirtualFree(
+ [In] IntPtr lpAddress,
+ [In] IntPtr dwSize,
+ [In] VirtualAllocAllocationType dwFreeType);
+
+ // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366898(v=vs.85).aspx
+ [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
+ [return: MarshalAs(UnmanagedType.Bool)]
+ public static extern bool VirtualProtect(
+ [In] IntPtr lpAddress,
+ [In] IntPtr dwSize,
+ [In] VirtualAllocProtection flNewProtect,
+ [Out] out VirtualAllocProtection lpflOldProtect);
+
+ // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366902(v=vs.85).aspx
+ [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)]
+ public static extern IntPtr VirtualQuery(
+ [In] IntPtr lpAddress,
+ [Out] out MEMORY_BASIC_INFORMATION lpBuffer,
+ [In] IntPtr dwLength);
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace System.Buffers
+{
+ /// <summary>
+ /// Represents a region of native memory. The <see cref="Memory"/> property can be used
+ /// to get a <see cref="Memory{Byte}"/> backed by this memory region.
+ /// </summary>
+ public abstract class BoundedMemory<T> : IDisposable where T : unmanaged
+ {
+ /// <summary>
+ /// Returns a value stating whether this native memory block is readonly.
+ /// </summary>
+ public abstract bool IsReadonly { get; }
+
+ /// <summary>
+ /// Gets the <see cref="Memory{Byte}"/> which represents this native memory.
+ /// This <see cref="BoundedMemory{T}"/> instance must be kept alive while working with the <see cref="Memory{Byte}"/>.
+ /// </summary>
+ public abstract Memory<T> Memory { get; }
+
+ /// <summary>
+ /// Gets the <see cref="Span{Byte}"/> which represents this native memory.
+ /// This <see cref="BoundedMemory{T}"/> instance must be kept alive while working with the <see cref="Span{Byte}"/>.
+ /// </summary>
+ public abstract Span<T> Span { get; }
+
+ /// <summary>
+ /// Disposes this <see cref="BoundedMemory{T}"/> instance.
+ /// </summary>
+ public abstract void Dispose();
+
+ /// <summary>
+ /// Sets this native memory block to be readonly. Writes to this block will cause an AV.
+ /// This method has no effect if the memory block is zero length or if the underlying
+ /// OS does not support marking the memory block as readonly.
+ /// </summary>
+ public abstract void MakeReadonly();
+
+ /// <summary>
+ /// Sets this native memory block to be read+write.
+ /// This method has no effect if the memory block is zero length or if the underlying
+ /// OS does not support marking the memory block as read+write.
+ /// </summary>
+ public abstract void MakeWriteable();
+ }
+}
\ No newline at end of file
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace System.Buffers
+{
+ /// <summary>
+ /// Dictates where the poison page should be placed.
+ /// </summary>
+ public enum PoisonPagePlacement
+ {
+ /// <summary>
+ /// The poison page should be placed immediately after the memory region.
+ /// Attempting to access the memory page immediately following the
+ /// span will result in an AV.
+ /// </summary>
+ After,
+
+ /// <summary>
+ /// The poison page should be placed immediately before the memory region.
+ /// Attempting to access the memory page immediately before the
+ /// span will result in an AV.
+ /// </summary>
+ Before,
+ }
+}
\ No newline at end of file
void System.IDisposable.Dispose() { }
}
}
+namespace System.Text.Unicode
+{
+ public static partial class Utf8
+ {
+ public static System.Buffers.OperationStatus FromUtf16(ReadOnlySpan<char> source, Span<byte> destination, out int charsRead, out int bytesWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true) { throw null; }
+ public static System.Buffers.OperationStatus ToUtf16(ReadOnlySpan<byte> source, Span<char> destination, out int bytesRead, out int charsWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true) { throw null; }
+ }
+}
namespace System.Threading
{
public readonly partial struct CancellationToken
MembersMustExist : Member 'System.Text.Rune.DecodeUtf8(System.ReadOnlySpan<System.Byte>, System.Text.Rune, System.Int32)' does not exist in the implementation but it does exist in the contract.
MembersMustExist : Member 'System.Text.Rune.DecodeUtf8FromEnd(System.ReadOnlySpan<System.Byte>, System.Text.Rune, System.Int32)' does not exist in the implementation but it does exist in the contract.
MembersMustExist : Member 'System.String System.Runtime.CompilerServices.RuntimeFeature.DefaultImplementationsOfInterfaces' does not exist in the implementation but it does exist in the contract.
+TypesMustExist : Type 'System.Text.Unicode.Utf8' does not exist in the implementation but it does exist in the contract.
MembersMustExist : Member 'System.Text.Rune.DecodeUtf8(System.ReadOnlySpan<System.Byte>, System.Text.Rune, System.Int32)' does not exist in the implementation but it does exist in the contract.
MembersMustExist : Member 'System.Text.Rune.DecodeUtf8FromEnd(System.ReadOnlySpan<System.Byte>, System.Text.Rune, System.Int32)' does not exist in the implementation but it does exist in the contract.
MembersMustExist : Member 'System.String System.Runtime.CompilerServices.RuntimeFeature.DefaultImplementationsOfInterfaces' does not exist in the implementation but it does exist in the contract.
+TypesMustExist : Type 'System.Text.Unicode.Utf8' does not exist in the implementation but it does exist in the contract.
<Compile Include="System\Text\RuneTests.netcoreapp.cs" />
<Compile Include="System\Text\RuneTests.TestData.netcoreapp.cs" />
<Compile Include="System\Text\StringBuilderTests.netcoreapp.cs" />
+ <Compile Include="System\Text\Unicode\Utf8Tests.netcoreapp.cs" />
+ <Compile Include="System\Text\Unicode\Utf8Tests.ToBytes.netcoreapp.cs" />
+ <Compile Include="System\Text\Unicode\Utf8Tests.ToChars.netcoreapp.cs" />
<Compile Include="System\Type\TypePropertyTests.netcoreapp.cs" />
<Compile Include="System\Type\TypeTests.netcoreapp.cs" />
<Compile Include="System\ArgIteratorTests.netcoreapp.cs" />
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Linq;
+using Xunit;
+
+namespace System.Text.Unicode.Tests
+{
+ public partial class Utf8Tests
+ {
+ [Theory]
+ [InlineData("", "")] // empty string is OK
+ [InlineData(X_UTF16, X_UTF8)]
+ [InlineData(E_ACUTE_UTF16, E_ACUTE_UTF8)]
+ [InlineData(EURO_SYMBOL_UTF16, EURO_SYMBOL_UTF8)]
+ public void ToBytes_WithSmallValidBuffers(string utf16Input, string expectedUtf8TranscodingHex)
+ {
+ // These test cases are for the "slow processing" code path at the end of TranscodeToUtf8,
+ // so inputs should be less than 2 chars.
+
+ Assert.InRange(utf16Input.Length, 0, 1);
+
+ ToBytes_Test_Core(
+ utf16Input: utf16Input,
+ destinationSize: expectedUtf8TranscodingHex.Length / 2,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: OperationStatus.Done,
+ expectedNumCharsRead: utf16Input.Length,
+ expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex));
+ }
+
+ [Theory]
+ [InlineData("AB")] // 2 ASCII chars, hits fast inner loop
+ [InlineData("ABCD")] // 4 ASCII chars, hits fast inner loop
+ [InlineData("ABCDEF")] // 6 ASCII chars, hits fast inner loop
+ [InlineData("ABCDEFGH")] // 8 ASCII chars, hits fast inner loop
+ [InlineData("ABCDEFGHIJ")] // 10 ASCII chars, hits fast inner loop
+ [InlineData("ABCDEF" + E_ACUTE_UTF16 + "HIJ")] // interrupts inner loop due to non-ASCII char in first char of first DWORD
+ [InlineData("ABCDEFG" + EURO_SYMBOL_UTF16 + "IJ")] // interrupts inner loop due to non-ASCII char in second char of first DWORD
+ [InlineData("ABCDEFGH" + E_ACUTE_UTF16 + "J")] // interrupts inner loop due to non-ASCII char in first char of second DWORD
+ [InlineData("ABCDEFGHI" + EURO_SYMBOL_UTF16)] // interrupts inner loop due to non-ASCII char in second char of second DWORD
+ [InlineData(X_UTF16 + E_ACUTE_UTF16)] // drains first ASCII char then falls down to slow path
+ [InlineData(X_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // drains first ASCII char then consumes 2x 2-byte sequences at once
+ [InlineData(E_ACUTE_UTF16 + X_UTF16)] // no first ASCII char to drain, consumes 2-byte seq followed by ASCII char
+ [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // stay within 2x 2-byte sequence processing loop
+ [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + X_UTF16)] // break out of 2x 2-byte seq loop due to ASCII data in second char of DWORD
+ [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + X_UTF16 + X_UTF16)] // break out of 2x 2-byte seq loop due to ASCII data in first char of DWORD
+ [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + EURO_SYMBOL_UTF16)] // break out of 2x 2-byte seq loop due to 3-byte data
+ [InlineData(E_ACUTE_UTF16 + EURO_SYMBOL_UTF16)] // 2-byte logic sees next char isn't ASCII, cannot read full DWORD from remaining input buffer, falls down to slow drain loop
+ [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + X_UTF16)] // 2x 3-byte logic can't read a full DWORD from next part of buffer, falls down to slow drain loop
+ [InlineData(EURO_SYMBOL_UTF16 + X_UTF16)] // 3-byte processing loop consumes trailing ASCII char, but can't read next DWORD, falls down to slow drain loop
+ [InlineData(EURO_SYMBOL_UTF16 + X_UTF16 + X_UTF16)] // 3-byte processing loop consumes trailing ASCII char, but can't read next DWORD, falls down to slow drain loop
+ [InlineData(EURO_SYMBOL_UTF16 + E_ACUTE_UTF16)] // 3-byte processing loop can't consume next ASCII char, can't read DWORD, falls down to slow drain loop
+ [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // stay within 2x 3-byte sequence processing loop
+ [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + X_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // consume stray ASCII char at beginning of DWORD after 2x 3-byte sequence
+ [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + X_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // consume stray ASCII char at end of DWORD after 2x 3-byte sequence
+ [InlineData(EURO_SYMBOL_UTF16 + E_ACUTE_UTF16 + X_UTF16)] // consume 2-byte sequence as second char in DWORD which begins with 3-byte encoded char
+ [InlineData(EURO_SYMBOL_UTF16 + GRINNING_FACE_UTF16)] // 3-byte sequence followed by 4-byte sequence
+ [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + GRINNING_FACE_UTF16)] // 2x 3-byte sequence followed by 4-byte sequence
+ [InlineData(GRINNING_FACE_UTF16)] // single 4-byte surrogate char pair
+ [InlineData(GRINNING_FACE_UTF16 + EURO_SYMBOL_UTF16)] // 4-byte surrogate char pair, cannot read next DWORD, falls down to slow drain loop
+ public void ToBytes_WithLargeValidBuffers(string utf16Input)
+ {
+ // These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf8,
+ // so inputs should be at least 2 chars.
+
+ Assert.True(utf16Input.Length >= 2);
+
+ // We're going to run the tests with destination buffer lengths ranging from 0 all the way
+ // to buffers large enough to hold the full output. This allows us to test logic that
+ // detects whether we're about to overrun our destination buffer and instead returns DestinationTooSmall.
+
+ Rune[] enumeratedScalars = utf16Input.EnumerateRunes().ToArray();
+
+ // 0-length buffer test
+ ToBytes_Test_Core(
+ utf16Input: utf16Input,
+ destinationSize: 0,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: OperationStatus.DestinationTooSmall,
+ expectedNumCharsRead: 0,
+ expectedUtf8Transcoding: ReadOnlySpan<byte>.Empty);
+
+ int expectedNumCharsConsumed = 0;
+ byte[] concatenatedUtf8 = Array.Empty<byte>();
+
+ for (int i = 0; i < enumeratedScalars.Length; i++)
+ {
+ Rune thisScalar = enumeratedScalars[i];
+
+ // provide partial destination buffers all the way up to (but not including) enough to hold the next full scalar encoding
+ for (int j = 1; j < thisScalar.Utf8SequenceLength; j++)
+ {
+ ToBytes_Test_Core(
+ utf16Input: utf16Input,
+ destinationSize: concatenatedUtf8.Length + j,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: OperationStatus.DestinationTooSmall,
+ expectedNumCharsRead: expectedNumCharsConsumed,
+ expectedUtf8Transcoding: concatenatedUtf8);
+ }
+
+ // now provide a destination buffer large enough to hold the next full scalar encoding
+
+ expectedNumCharsConsumed += thisScalar.Utf16SequenceLength;
+ concatenatedUtf8 = concatenatedUtf8.Concat(ToUtf8(thisScalar)).ToArray();
+
+ ToBytes_Test_Core(
+ utf16Input: utf16Input,
+ destinationSize: concatenatedUtf8.Length,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: (i == enumeratedScalars.Length - 1) ? OperationStatus.Done : OperationStatus.DestinationTooSmall,
+ expectedNumCharsRead: expectedNumCharsConsumed,
+ expectedUtf8Transcoding: concatenatedUtf8);
+ }
+ }
+
+ [Theory]
+ [InlineData('\uD800', OperationStatus.NeedMoreData)] // standalone high surrogate
+ [InlineData('\uDFFF', OperationStatus.InvalidData)] // standalone low surrogate
+ public void ToBytes_WithOnlyStandaloneSurrogates(char charValue, OperationStatus expectedOperationStatus)
+ {
+ ToBytes_Test_Core(
+ utf16Input: new[] { charValue },
+ destinationSize: 0,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: expectedOperationStatus,
+ expectedNumCharsRead: 0,
+ expectedUtf8Transcoding: Span<byte>.Empty);
+ }
+
+ [Theory]
+ [InlineData("<LOW><HIGH>", 0, "")] // swapped surrogate pair characters
+ [InlineData("A<LOW><HIGH>", 1, "41")] // consume standalone ASCII char, then swapped surrogate pair characters
+ [InlineData("A<HIGH>B", 1, "41")] // consume standalone ASCII char, then standalone high surrogate char
+ [InlineData("A<LOW>B", 1, "41")] // consume standalone ASCII char, then standalone low surrogate char
+ [InlineData("AB<HIGH><HIGH>", 2, "4142")] // consume two ASCII chars, then standalone high surrogate char
+ [InlineData("AB<LOW><LOW>", 2, "4142")] // consume two ASCII chars, then standalone low surrogate char
+ public void ToBytes_WithInvalidSurrogates(string utf16Input, int expectedNumCharsConsumed, string expectedUtf8TranscodingHex)
+ {
+ // xUnit can't handle ill-formed strings in [InlineData], so we replace here.
+
+ utf16Input = utf16Input.Replace("<HIGH>", "\uD800").Replace("<LOW>", "\uDFFF");
+
+ // These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf8,
+ // so inputs should be at least 2 chars.
+
+ Assert.True(utf16Input.Length >= 2);
+
+ ToBytes_Test_Core(
+ utf16Input: utf16Input,
+ destinationSize: expectedUtf8TranscodingHex.Length / 2,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: OperationStatus.InvalidData,
+ expectedNumCharsRead: expectedNumCharsConsumed,
+ expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex));
+ }
+
+ [Theory]
+ [InlineData("<LOW><HIGH>", REPLACEMENT_CHAR_UTF8)] // standalone low surr. and incomplete high surr.
+ [InlineData("<HIGH><HIGH>", REPLACEMENT_CHAR_UTF8)] // standalone high surr. and incomplete high surr.
+ [InlineData("<LOW><LOW>", REPLACEMENT_CHAR_UTF8 + REPLACEMENT_CHAR_UTF8)] // standalone low surr. and incomplete low surr.
+ [InlineData("A<LOW>B<LOW>C<HIGH>D", "41" + REPLACEMENT_CHAR_UTF8 + "42" + REPLACEMENT_CHAR_UTF8 + "43" + REPLACEMENT_CHAR_UTF8 + "44")] // standalone low, low, high surrounded by other data
+ public void ToBytes_WithReplacements(string utf16Input, string expectedUtf8TranscodingHex)
+ {
+ // xUnit can't handle ill-formed strings in [InlineData], so we replace here.
+
+ utf16Input = utf16Input.Replace("<HIGH>", "\uD800").Replace("<LOW>", "\uDFFF");
+
+ bool isFinalCharHighSurrogate = char.IsHighSurrogate(utf16Input.Last());
+
+ ToBytes_Test_Core(
+ utf16Input: utf16Input,
+ destinationSize: expectedUtf8TranscodingHex.Length / 2,
+ replaceInvalidSequences: true,
+ isFinalChunk: false,
+ expectedOperationStatus: (isFinalCharHighSurrogate) ? OperationStatus.NeedMoreData : OperationStatus.Done,
+ expectedNumCharsRead: (isFinalCharHighSurrogate) ? (utf16Input.Length - 1) : utf16Input.Length,
+ expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex));
+
+ if (isFinalCharHighSurrogate)
+ {
+ // Also test with isFinalChunk = true
+ ToBytes_Test_Core(
+ utf16Input: utf16Input,
+ destinationSize: expectedUtf8TranscodingHex.Length / 2 + Rune.ReplacementChar.Utf8SequenceLength /* for replacement char */,
+ replaceInvalidSequences: true,
+ isFinalChunk: true,
+ expectedOperationStatus: OperationStatus.Done,
+ expectedNumCharsRead: utf16Input.Length,
+ expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex + REPLACEMENT_CHAR_UTF8));
+ }
+ }
+
+ [Theory]
+ [InlineData(E_ACUTE_UTF16 + "<LOW>", true, 1, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8)] // not enough output buffer to hold U+FFFD
+ [InlineData(E_ACUTE_UTF16 + "<LOW>", true, 2, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // replace standalone low surr. at end
+ [InlineData(E_ACUTE_UTF16 + "<HIGH>", true, 1, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8)] // not enough output buffer to hold U+FFFD
+ [InlineData(E_ACUTE_UTF16 + "<HIGH>", true, 2, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // replace standalone high surr. at end
+ [InlineData(E_ACUTE_UTF16 + "<HIGH>", false, 1, OperationStatus.NeedMoreData, E_ACUTE_UTF8)] // don't replace standalone high surr. at end
+ [InlineData(E_ACUTE_UTF16 + "<HIGH>" + X_UTF16, true, 2, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // not enough output buffer to hold 'X'
+ [InlineData(E_ACUTE_UTF16 + "<HIGH>" + X_UTF16, false, 2, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // not enough output buffer to hold 'X'
+ [InlineData(E_ACUTE_UTF16 + "<HIGH>" + X_UTF16, true, 3, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8 + X_UTF8)] // replacement followed by 'X'
+ [InlineData(E_ACUTE_UTF16 + "<HIGH>" + X_UTF16, false, 3, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8 + X_UTF8)] // replacement followed by 'X'
+ public void ToBytes_WithReplacements_AndCustomBufferSizes(string utf16Input, bool isFinalChunk, int expectedNumCharsConsumed, OperationStatus expectedOperationStatus, string expectedUtf8TranscodingHex)
+ {
+ // xUnit can't handle ill-formed strings in [InlineData], so we replace here.
+
+ utf16Input = utf16Input.Replace("<HIGH>", "\uD800").Replace("<LOW>", "\uDFFF");
+
+ ToBytes_Test_Core(
+ utf16Input: utf16Input,
+ destinationSize: expectedUtf8TranscodingHex.Length / 2,
+ replaceInvalidSequences: true,
+ isFinalChunk: isFinalChunk,
+ expectedOperationStatus: expectedOperationStatus,
+ expectedNumCharsRead: expectedNumCharsConsumed,
+ expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex));
+ }
+
+ [Fact]
+ public void ToBytes_AllPossibleScalarValues()
+ {
+ ToBytes_Test_Core(
+ utf16Input: s_allScalarsAsUtf16.Span,
+ destinationSize: s_allScalarsAsUtf8.Length,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: OperationStatus.Done,
+ expectedNumCharsRead: s_allScalarsAsUtf16.Length,
+ expectedUtf8Transcoding: s_allScalarsAsUtf8.Span);
+ }
+
+ private static void ToBytes_Test_Core(ReadOnlySpan<char> utf16Input, int destinationSize, bool replaceInvalidSequences, bool isFinalChunk, OperationStatus expectedOperationStatus, int expectedNumCharsRead, ReadOnlySpan<byte> expectedUtf8Transcoding)
+ {
+ // Arrange
+
+ using (BoundedMemory<char> boundedSource = BoundedMemory.AllocateFromExistingData(utf16Input))
+ using (BoundedMemory<byte> boundedDestination = BoundedMemory.Allocate<byte>(destinationSize))
+ {
+ boundedSource.MakeReadonly();
+
+ // Act
+
+ OperationStatus actualOperationStatus = Utf8.FromUtf16(boundedSource.Span, boundedDestination.Span, out int actualNumCharsRead, out int actualNumBytesWritten, replaceInvalidSequences, isFinalChunk);
+
+ // Assert
+
+ Assert.Equal(expectedOperationStatus, actualOperationStatus);
+ Assert.Equal(expectedNumCharsRead, actualNumCharsRead);
+ Assert.Equal(expectedUtf8Transcoding.Length, actualNumBytesWritten);
+ Assert.Equal(expectedUtf8Transcoding.ToArray(), boundedDestination.Span.Slice(0, actualNumBytesWritten).ToArray());
+ }
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Linq;
+using Xunit;
+
+namespace System.Text.Unicode.Tests
+{
+ public partial class Utf8Tests
+ {
+ [Theory]
+ [InlineData("80", 0, "")] // sequence cannot begin with continuation character
+ [InlineData("8182", 0, "")] // sequence cannot begin with continuation character
+ [InlineData("838485", 0, "")] // sequence cannot begin with continuation character
+ [InlineData(X_UTF8 + "80", 1, X_UTF16)] // sequence cannot begin with continuation character
+ [InlineData(X_UTF8 + "8182", 1, X_UTF16)] // sequence cannot begin with continuation character
+ [InlineData("C0", 0, "")] // [ C0 ] is always invalid
+ [InlineData("C080", 0, "")] // [ C0 ] is always invalid
+ [InlineData("C08081", 0, "")] // [ C0 ] is always invalid
+ [InlineData(X_UTF8 + "C1", 1, X_UTF16)] // [ C1 ] is always invalid
+ [InlineData(X_UTF8 + "C180", 1, X_UTF16)] // [ C1 ] is always invalid
+ [InlineData(X_UTF8 + "C27F", 1, X_UTF16)] // [ C2 ] is improperly terminated
+ [InlineData("E2827F", 0, "")] // [ E2 82 ] is improperly terminated
+ [InlineData("E09F80", 0, "")] // [ E0 9F ... ] is overlong
+ [InlineData("E0C080", 0, "")] // [ E0 ] is improperly terminated
+ [InlineData("ED7F80", 0, "")] // [ ED ] is improperly terminated
+ [InlineData("EDA080", 0, "")] // [ ED A0 ... ] is surrogate
+ public void ToChars_WithSmallInvalidBuffers(string utf8HexInput, int expectedNumBytesConsumed, string expectedUtf16Transcoding)
+ {
+ // These test cases are for the "slow processing" code path at the end of TranscodeToUtf16,
+ // so inputs should be less than 4 bytes.
+
+ Assert.InRange(utf8HexInput.Length, 0, 6);
+
+ ToChars_Test_Core(
+ utf8Input: DecodeHex(utf8HexInput),
+ destinationSize: expectedUtf16Transcoding.Length,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: OperationStatus.InvalidData,
+ expectedNumBytesRead: expectedNumBytesConsumed,
+ expectedUtf16Transcoding: expectedUtf16Transcoding);
+ }
+
+ [Theory]
+ [InlineData("C2", 0, "")] // [ C2 ] is an incomplete sequence
+ [InlineData("E282", 0, "")] // [ E2 82 ] is an incomplete sequence
+ [InlineData(X_UTF8 + "C2", 1, X_UTF16)] // [ C2 ] is an incomplete sequence
+ [InlineData(X_UTF8 + "E0", 1, X_UTF16)] // [ E0 ] is an incomplete sequence
+ [InlineData(X_UTF8 + "E0BF", 1, X_UTF16)] // [ E0 BF ] is an incomplete sequence
+ [InlineData(X_UTF8 + "F0", 1, X_UTF16)] // [ F0 ] is an incomplete sequence
+ [InlineData(X_UTF8 + "F0BF", 1, X_UTF16)] // [ F0 BF ] is an incomplete sequence
+ [InlineData(X_UTF8 + "F0BFA0", 1, X_UTF16)] // [ F0 BF A0 ] is an incomplete sequence
+ [InlineData(E_ACUTE_UTF8 + "C2", 2, E_ACUTE_UTF16)] // [ C2 ] is an incomplete sequence
+ [InlineData(E_ACUTE_UTF8 + "E0", 2, E_ACUTE_UTF16)] // [ E0 ] is an incomplete sequence
+ [InlineData(E_ACUTE_UTF8 + "F0", 2, E_ACUTE_UTF16)] // [ F0 ] is an incomplete sequence
+ [InlineData(E_ACUTE_UTF8 + "E0BF", 2, E_ACUTE_UTF16)] // [ E0 BF ] is an incomplete sequence
+ [InlineData(E_ACUTE_UTF8 + "F0BF", 2, E_ACUTE_UTF16)] // [ F0 BF ] is an incomplete sequence
+ [InlineData(EURO_SYMBOL_UTF8 + "C2", 3, EURO_SYMBOL_UTF16)] // [ C2 ] is an incomplete sequence
+ [InlineData(EURO_SYMBOL_UTF8 + "E0", 3, EURO_SYMBOL_UTF16)] // [ E0 ] is an incomplete sequence
+ [InlineData(EURO_SYMBOL_UTF8 + "F0", 3, EURO_SYMBOL_UTF16)] // [ F0 ] is an incomplete sequence
+ public void ToChars_WithVariousIncompleteBuffers(string utf8HexInput, int expectedNumBytesConsumed, string expectedUtf16Transcoding)
+ {
+ // These test cases are for the "slow processing" code path at the end of TranscodeToUtf16,
+ // so inputs should be less than 4 bytes.
+
+ ToChars_Test_Core(
+ utf8Input: DecodeHex(utf8HexInput),
+ destinationSize: expectedUtf16Transcoding.Length,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: OperationStatus.NeedMoreData,
+ expectedNumBytesRead: expectedNumBytesConsumed,
+ expectedUtf16Transcoding: expectedUtf16Transcoding);
+ }
+
+ [Theory]
+ /* SMALL VALID BUFFERS - tests drain loop at end of method */
+ [InlineData("")] // empty string is OK
+ [InlineData("X")]
+ [InlineData("XY")]
+ [InlineData("XYZ")]
+ [InlineData(E_ACUTE_UTF16)]
+ [InlineData(X_UTF16 + E_ACUTE_UTF16)]
+ [InlineData(E_ACUTE_UTF16 + X_UTF16)]
+ [InlineData(EURO_SYMBOL_UTF16)]
+ /* LARGE VALID BUFFERS - test main loop at beginning of method */
+ [InlineData(E_ACUTE_UTF16 + "ABCD" + "0123456789:;<=>?")] // Loop unrolling at end of buffer
+ [InlineData(E_ACUTE_UTF16 + "ABCD" + "0123456789:;<=>?" + "01234567" + E_ACUTE_UTF16 + "89:;<=>?")] // Loop unrolling interrupted by non-ASCII
+ [InlineData("ABC" + E_ACUTE_UTF16 + "0123")] // 3 ASCII bytes followed by non-ASCII
+ [InlineData("AB" + E_ACUTE_UTF16 + "0123")] // 2 ASCII bytes followed by non-ASCII
+ [InlineData("A" + E_ACUTE_UTF16 + "0123")] // 1 ASCII byte followed by non-ASCII
+ [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // 4x 2-byte sequences, exercises optimization code path in 2-byte sequence processing
+ [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + "PQ")] // 3x 2-byte sequences + 2 ASCII bytes, exercises optimization code path in 2-byte sequence processing
+ [InlineData(E_ACUTE_UTF16 + "PQ")] // single 2-byte sequence + 2 trailing ASCII bytes, exercises draining logic in 2-byte sequence processing
+ [InlineData(E_ACUTE_UTF16 + "P" + E_ACUTE_UTF16 + "0@P")] // single 2-byte sequences + 1 trailing ASCII byte + 2-byte sequence, exercises draining logic in 2-byte sequence processing
+ [InlineData(EURO_SYMBOL_UTF16 + "@")] // single 3-byte sequence + 1 trailing ASCII byte, exercises draining logic in 3-byte sequence processing
+ [InlineData(EURO_SYMBOL_UTF16 + "@P`")] // single 3-byte sequence + 3 trailing ASCII byte, exercises draining logic and "running out of data" logic in 3-byte sequence processing
+ [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // 3x 3-byte sequences, exercises "stay within 3-byte loop" logic in 3-byte sequence processing
+ [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // 4x 3-byte sequences, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing
+ [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + E_ACUTE_UTF16)] // 3x 3-byte sequences + single 2-byte sequence, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing
+ [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // 2x 3-byte sequences + 4x 2-byte sequences, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing
+ [InlineData(GRINNING_FACE_UTF16 + GRINNING_FACE_UTF16)] // 2x 4-byte sequences, exercises 4-byte sequence processing
+ [InlineData(GRINNING_FACE_UTF16 + "@AB")] // single 4-byte sequence + 3 ASCII bytes, exercises 4-byte sequence processing and draining logic
+ [InlineData("\U0001F938\U0001F3FD\u200D\u2640\uFE0F")] // U+1F938 U+1F3FD U+200D U+2640 U+FE0F WOMAN CARTWHEELING: MEDIUM SKIN TONE, exercising switching between multiple sequence lengths
+ public void ToChars_ValidBuffers(string utf16Input)
+ {
+ // We're going to run the tests with destination buffer lengths ranging from 0 all the way
+ // to buffers large enough to hold the full output. This allows us to test logic that
+ // detects whether we're about to overrun our destination buffer and instead returns DestinationTooSmall.
+
+ Rune[] enumeratedScalars = utf16Input.EnumerateRunes().ToArray();
+
+ // Convert entire input to UTF-8 using our unit test reference logic.
+
+ byte[] utf8Input = enumeratedScalars.SelectMany(ToUtf8).ToArray();
+
+ // 0-length buffer test
+ ToChars_Test_Core(
+ utf8Input: utf8Input,
+ destinationSize: 0,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: (utf8Input.Length == 0) ? OperationStatus.Done : OperationStatus.DestinationTooSmall,
+ expectedNumBytesRead: 0,
+ expectedUtf16Transcoding: ReadOnlySpan<char>.Empty);
+
+ int expectedNumBytesConsumed = 0;
+ char[] concatenatedUtf16 = Array.Empty<char>();
+
+ for (int i = 0; i < enumeratedScalars.Length; i++)
+ {
+ Rune thisScalar = enumeratedScalars[i];
+
+ // if this is an astral scalar value, quickly test a buffer that's not large enough to contain the entire UTF-16 encoding
+
+ if (!thisScalar.IsBmp)
+ {
+ ToChars_Test_Core(
+ utf8Input: utf8Input,
+ destinationSize: concatenatedUtf16.Length + 1,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: OperationStatus.DestinationTooSmall,
+ expectedNumBytesRead: expectedNumBytesConsumed,
+ expectedUtf16Transcoding: concatenatedUtf16);
+ }
+
+ // now provide a destination buffer large enough to hold the next full scalar encoding
+
+ expectedNumBytesConsumed += thisScalar.Utf8SequenceLength;
+ concatenatedUtf16 = concatenatedUtf16.Concat(ToUtf16(thisScalar)).ToArray();
+
+ ToChars_Test_Core(
+ utf8Input: utf8Input,
+ destinationSize: concatenatedUtf16.Length,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: (i == enumeratedScalars.Length - 1) ? OperationStatus.Done : OperationStatus.DestinationTooSmall,
+ expectedNumBytesRead: expectedNumBytesConsumed,
+ expectedUtf16Transcoding: concatenatedUtf16);
+ }
+ }
+
+ [Theory]
+ [InlineData("3031" + "80" + "202122232425", 2, "01")] // Continuation character at start of sequence should match no bitmask
+ [InlineData("3031" + "C080" + "2021222324", 2, "01")] // Overlong 2-byte sequence at start of DWORD
+ [InlineData("3031" + "C180" + "2021222324", 2, "01")] // Overlong 2-byte sequence at start of DWORD
+ [InlineData("C280" + "C180", 2, "\u0080")] // Overlong 2-byte sequence at end of DWORD
+ [InlineData("C27F" + "C280", 0, "")] // Improperly terminated 2-byte sequence at start of DWORD
+ [InlineData("C2C0" + "C280", 0, "")] // Improperly terminated 2-byte sequence at start of DWORD
+ [InlineData("C280" + "C27F", 2, "\u0080")] // Improperly terminated 2-byte sequence at end of DWORD
+ [InlineData("C280" + "C2C0", 2, "\u0080")] // Improperly terminated 2-byte sequence at end of DWORD
+ [InlineData("C280" + "C280" + "80203040", 4, "\u0080\u0080")] // Continuation character at start of sequence, within "stay in 2-byte processing" optimization
+ [InlineData("C280" + "C280" + "C180" + "C280", 4, "\u0080\u0080")] // Overlong 2-byte sequence at start of DWORD, within "stay in 2-byte processing" optimization
+ [InlineData("C280" + "C280" + "C280" + "C180", 6, "\u0080\u0080\u0080")] // Overlong 2-byte sequence at end of DWORD, within "stay in 2-byte processing" optimization
+ [InlineData("3031" + "E09F80" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Overlong 3-byte sequence at start of DWORD
+ [InlineData("3031" + "E07F80" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD
+ [InlineData("3031" + "E0C080" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD
+ [InlineData("3031" + "E17F80" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD
+ [InlineData("3031" + "E1C080" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD
+ [InlineData("3031" + "EDA080" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Surrogate 3-byte sequence at start of DWORD
+ [InlineData("3031" + "F5808080", 2, "01")] // [ F5 ] is always invalid
+ [InlineData("3031" + "F6808080", 2, "01")] // [ F6 ] is always invalid
+ [InlineData("3031" + "F7808080", 2, "01")] // [ F7 ] is always invalid
+ [InlineData("3031" + "F8808080", 2, "01")] // [ F8 ] is always invalid
+ [InlineData("3031" + "F9808080", 2, "01")] // [ F9 ] is always invalid
+ [InlineData("3031" + "FA808080", 2, "01")] // [ FA ] is always invalid
+ [InlineData("3031" + "FB808080", 2, "01")] // [ FB ] is always invalid
+ [InlineData("3031" + "FC808080", 2, "01")] // [ FC ] is always invalid
+ [InlineData("3031" + "FD808080", 2, "01")] // [ FD ] is always invalid
+ [InlineData("3031" + "FE808080", 2, "01")] // [ FE ] is always invalid
+ [InlineData("3031" + "FF808080", 2, "01")] // [ FF ] is always invalid
+ public void ToChars_WithLargeInvalidBuffers(string utf8HexInput, int expectedNumBytesConsumed, string expectedUtf16Transcoding)
+ {
+ // These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf16,
+ // so inputs should be less >= 4 bytes.
+
+ Assert.True(utf8HexInput.Length >= 8);
+
+ ToChars_Test_Core(
+ utf8Input: DecodeHex(utf8HexInput),
+ destinationSize: expectedUtf16Transcoding.Length,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: OperationStatus.InvalidData,
+ expectedNumBytesRead: expectedNumBytesConsumed,
+ expectedUtf16Transcoding: expectedUtf16Transcoding);
+ }
+
+ [Theory]
+ [InlineData(X_UTF8 + "80" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // stray continuation byte [ 80 ]
+ [InlineData(X_UTF8 + "FF" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // invalid UTF-8 byte [ FF ]
+ [InlineData(X_UTF8 + "C2" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // 2-byte sequence starter [ C2 ] not followed by continuation byte
+ [InlineData(X_UTF8 + "C1C180" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ C1 80 ] is overlong but consists of two maximal invalid subsequences, each of length 1 byte
+ [InlineData(X_UTF8 + E_ACUTE_UTF8 + "E08080", X_UTF16 + E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16)] // [ E0 80 ] is overlong 2-byte sequence (1 byte maximal invalid subsequence), and following [ 80 ] is stray continuation byte
+ [InlineData(GRINNING_FACE_UTF8 + "F08F8080" + GRINNING_FACE_UTF8, GRINNING_FACE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + GRINNING_FACE_UTF16)] // [ F0 8F ] is overlong 4-byte sequence (1 byte maximal invalid subsequence), and following [ 80 ] instances are stray continuation bytes
+ [InlineData(GRINNING_FACE_UTF8 + "F4908080" + GRINNING_FACE_UTF8, GRINNING_FACE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + GRINNING_FACE_UTF16)] // [ F4 90 ] is out-of-range 4-byte sequence (1 byte maximal invalid subsequence), and following [ 80 ] instances are stray continuation bytes
+ [InlineData(E_ACUTE_UTF8 + "EDA0" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ ED A0 ] is encoding of UTF-16 surrogate code point, so consists of two maximal invalid subsequences, each of length 1 byte
+ [InlineData(E_ACUTE_UTF8 + "ED80" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ ED 80 ] is incomplete 3-byte sequence, so is 2-byte maximal invalid subsequence
+ [InlineData(E_ACUTE_UTF8 + "F380" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ F3 80 ] is incomplete 4-byte sequence, so is 2-byte maximal invalid subsequence
+ [InlineData(E_ACUTE_UTF8 + "F38080" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ F3 80 80 ] is incomplete 4-byte sequence, so is 3-byte maximal invalid subsequence
+ public void ToChars_WithReplacement(string utf8HexInput, string expectedUtf16Transcoding)
+ {
+ // First run the test with isFinalBlock = false,
+ // both with and without some bytes of incomplete trailing data.
+
+ ToChars_Test_Core(
+ utf8Input: DecodeHex(utf8HexInput),
+ destinationSize: expectedUtf16Transcoding.Length,
+ replaceInvalidSequences: true,
+ isFinalChunk: false,
+ expectedOperationStatus: OperationStatus.Done,
+ expectedNumBytesRead: utf8HexInput.Length / 2,
+ expectedUtf16Transcoding: expectedUtf16Transcoding);
+
+ ToChars_Test_Core(
+ utf8Input: DecodeHex(utf8HexInput + "E0BF" /* trailing data */),
+ destinationSize: expectedUtf16Transcoding.Length,
+ replaceInvalidSequences: true,
+ isFinalChunk: false,
+ expectedOperationStatus: OperationStatus.NeedMoreData,
+ expectedNumBytesRead: utf8HexInput.Length / 2,
+ expectedUtf16Transcoding: expectedUtf16Transcoding);
+
+ // Then run the test with isFinalBlock = true, with incomplete trailing data.
+
+ ToChars_Test_Core(
+ utf8Input: DecodeHex(utf8HexInput + "E0BF" /* trailing data */),
+ destinationSize: expectedUtf16Transcoding.Length,
+ replaceInvalidSequences: true,
+ isFinalChunk: true,
+ expectedOperationStatus: OperationStatus.DestinationTooSmall,
+ expectedNumBytesRead: utf8HexInput.Length / 2,
+ expectedUtf16Transcoding: expectedUtf16Transcoding);
+
+ ToChars_Test_Core(
+ utf8Input: DecodeHex(utf8HexInput + "E0BF" /* trailing data */),
+ destinationSize: expectedUtf16Transcoding.Length + 1, // allow room for U+FFFD
+ replaceInvalidSequences: true,
+ isFinalChunk: true,
+ expectedOperationStatus: OperationStatus.Done,
+ expectedNumBytesRead: utf8HexInput.Length / 2 + 2,
+ expectedUtf16Transcoding: expectedUtf16Transcoding + REPLACEMENT_CHAR_UTF16);
+ }
+
+ [Fact]
+ public void ToChars_AllPossibleScalarValues()
+ {
+ ToChars_Test_Core(
+ utf8Input: s_allScalarsAsUtf8.Span,
+ destinationSize: s_allScalarsAsUtf16.Length,
+ replaceInvalidSequences: false,
+ isFinalChunk: false,
+ expectedOperationStatus: OperationStatus.Done,
+ expectedNumBytesRead: s_allScalarsAsUtf8.Length,
+ expectedUtf16Transcoding: s_allScalarsAsUtf16.Span);
+ }
+
+ private static void ToChars_Test_Core(ReadOnlySpan<byte> utf8Input, int destinationSize, bool replaceInvalidSequences, bool isFinalChunk, OperationStatus expectedOperationStatus, int expectedNumBytesRead, ReadOnlySpan<char> expectedUtf16Transcoding)
+ {
+ // Arrange
+
+ using (BoundedMemory<byte> boundedSource = BoundedMemory.AllocateFromExistingData(utf8Input))
+ using (BoundedMemory<char> boundedDestination = BoundedMemory.Allocate<char>(destinationSize))
+ {
+ boundedSource.MakeReadonly();
+
+ // Act
+
+ OperationStatus actualOperationStatus = Utf8.ToUtf16(boundedSource.Span, boundedDestination.Span, out int actualNumBytesRead, out int actualNumCharsWritten, replaceInvalidSequences, isFinalChunk);
+
+ // Assert
+
+ Assert.Equal(expectedOperationStatus, actualOperationStatus);
+ Assert.Equal(expectedNumBytesRead, actualNumBytesRead);
+ Assert.Equal(expectedUtf16Transcoding.Length, actualNumCharsWritten);
+ Assert.Equal(expectedUtf16Transcoding.ToString(), boundedDestination.Span.Slice(0, actualNumCharsWritten).ToString());
+ }
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text.RegularExpressions;
+using Xunit;
+
+namespace System.Text.Unicode.Tests
+{
+ public partial class Utf8Tests
+ {
+ private const string X_UTF8 = "58"; // U+0058 LATIN CAPITAL LETTER X, 1 byte
+ private const string X_UTF16 = "X";
+
+ private const string Y_UTF8 = "59"; // U+0058 LATIN CAPITAL LETTER Y, 1 byte
+ private const string Y_UTF16 = "Y";
+
+ private const string Z_UTF8 = "5A"; // U+0058 LATIN CAPITAL LETTER Z, 1 byte
+ private const string Z_UTF16 = "Z";
+
+ private const string E_ACUTE_UTF8 = "C3A9"; // U+00E9 LATIN SMALL LETTER E WITH ACUTE, 2 bytes
+ private const string E_ACUTE_UTF16 = "\u00E9";
+
+ private const string EURO_SYMBOL_UTF8 = "E282AC"; // U+20AC EURO SIGN, 3 bytes
+ private const string EURO_SYMBOL_UTF16 = "\u20AC";
+
+ private const string REPLACEMENT_CHAR_UTF8 = "EFBFBD"; // U+FFFD REPLACEMENT CHAR, 3 bytes
+ private const string REPLACEMENT_CHAR_UTF16 = "\uFFFD";
+
+ private const string GRINNING_FACE_UTF8 = "F09F9880"; // U+1F600 GRINNING FACE, 4 bytes
+ private const string GRINNING_FACE_UTF16 = "\U0001F600";
+
+ // All valid scalars [ U+0000 .. U+D7FF ] and [ U+E000 .. U+10FFFF ].
+ private static readonly IEnumerable<Rune> s_allValidScalars = Enumerable.Range(0x0000, 0xD800).Concat(Enumerable.Range(0xE000, 0x110000 - 0xE000)).Select(value => new Rune(value));
+
+ private static readonly ReadOnlyMemory<char> s_allScalarsAsUtf16;
+ private static readonly ReadOnlyMemory<byte> s_allScalarsAsUtf8;
+
+ static Utf8Tests()
+ {
+ List<char> allScalarsAsUtf16 = new List<char>();
+ List<byte> allScalarsAsUtf8 = new List<byte>();
+
+ foreach (Rune rune in s_allValidScalars)
+ {
+ allScalarsAsUtf16.AddRange(ToUtf16(rune));
+ allScalarsAsUtf8.AddRange(ToUtf8(rune));
+ }
+
+ s_allScalarsAsUtf16 = allScalarsAsUtf16.ToArray().AsMemory();
+ s_allScalarsAsUtf8 = allScalarsAsUtf8.ToArray().AsMemory();
+ }
+
+ /*
+ * COMMON UTILITIES FOR UNIT TESTS
+ */
+
+ private static byte[] DecodeHex(ReadOnlySpan<char> inputHex)
+ {
+ Assert.True(Regex.IsMatch(inputHex.ToString(), "^([0-9a-fA-F]{2})*$"), "Input must be an even number of hex characters.");
+
+ byte[] retVal = new byte[inputHex.Length / 2];
+ for (int i = 0; i < retVal.Length; i++)
+ {
+ retVal[i] = byte.Parse(inputHex.Slice(i * 2, 2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture);
+ }
+ return retVal;
+ }
+
+ // !! IMPORTANT !!
+ // Don't delete this implementation, as we use it as a reference to make sure the framework's
+ // transcoding logic is correct.
+ private static byte[] ToUtf8(Rune rune)
+ {
+ Assert.True(Rune.IsValid(rune.Value), $"Rune with value U+{(uint)rune.Value:X4} is not well-formed.");
+
+ if (rune.Value < 0x80)
+ {
+ return new[]
+ {
+ (byte)rune.Value
+ };
+ }
+ else if (rune.Value < 0x0800)
+ {
+ return new[]
+ {
+ (byte)((rune.Value >> 6) | 0xC0),
+ (byte)((rune.Value & 0x3F) | 0x80)
+ };
+ }
+ else if (rune.Value < 0x10000)
+ {
+ return new[]
+ {
+ (byte)((rune.Value >> 12) | 0xE0),
+ (byte)(((rune.Value >> 6) & 0x3F) | 0x80),
+ (byte)((rune.Value & 0x3F) | 0x80)
+ };
+ }
+ else
+ {
+ return new[]
+ {
+ (byte)((rune.Value >> 18) | 0xF0),
+ (byte)(((rune.Value >> 12) & 0x3F) | 0x80),
+ (byte)(((rune.Value >> 6) & 0x3F) | 0x80),
+ (byte)((rune.Value & 0x3F) | 0x80)
+ };
+ }
+ }
+
+ // !! IMPORTANT !!
+ // Don't delete this implementation, as we use it as a reference to make sure the framework's
+ // transcoding logic is correct.
+ private static char[] ToUtf16(Rune rune)
+ {
+ Assert.True(Rune.IsValid(rune.Value), $"Rune with value U+{(uint)rune.Value:X4} is not well-formed.");
+
+ if (rune.IsBmp)
+ {
+ return new[]
+ {
+ (char)rune.Value
+ };
+ }
+ else
+ {
+ return new[]
+ {
+ (char)((rune.Value >> 10) + 0xD800 - 0x40),
+ (char)((rune.Value & 0x03FF) + 0xDC00)
+ };
+ }
+ }
+ }
+}