From effeb6ee79ca64376775a81ef1b2d9b507a30163 Mon Sep 17 00:00:00 2001
From: Levi Broderick <levib@microsoft.com>
Date: Tue, 2 Apr 2019 21:07:31 -0700
Subject: [PATCH] Hook up new UTF-8 logic through UTF8Encoding - Add vectorized
 UTF-16 validation and transcoded byte counts - Move Utf16Utility into Unicode
 namespace alongside Utf8Utility - Fix some bugs in DecoderNLS's draining
 logic

---
 .../shared/System.Private.CoreLib.Shared.projitems |    3 +-
 .../shared/System/Globalization/CompareInfo.cs     |    2 +-
 .../shared/System/Globalization/TextInfo.cs        |    1 +
 .../shared/System/Marvin.OrdinalIgnoreCase.cs      |    2 +-
 .../shared/System/Text/DecoderNLS.cs               |   11 +-
 .../shared/System/Text/Encoding.Internal.cs        |   21 +-
 .../shared/System/Text/Rune.cs                     |    1 +
 .../shared/System/Text/UTF8Encoding.cs             | 2570 +++-----------------
 .../System/Text/Unicode/Utf16Utility.Validation.cs |  361 +++
 .../System/Text/{ => Unicode}/Utf16Utility.cs      |    2 +-
 .../shared/System/Text/Unicode/Utf8.cs             |    6 +-
 .../System/Text/Unicode/Utf8Utility.Transcoding.cs |    2 +-
 .../System/Text/Unicode/Utf8Utility.Validation.cs  |   19 +-
 .../shared/System/Text/Unicode/Utf8Utility.cs      |    2 +-
 14 files changed, 808 insertions(+), 2195 deletions(-)
 create mode 100644 src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs
 rename src/System.Private.CoreLib/shared/System/Text/{ => Unicode}/Utf16Utility.cs (99%)
diff --git a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
index 3ef9b35..4d50d9f 100644
--- a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
+++ b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
@@ -797,11 +797,12 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\UnicodeDebug.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\UnicodeEncoding.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\UnicodeUtility.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\Text\Utf16Utility.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\UTF32Encoding.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\UTF7Encoding.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\UTF8Encoding.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\ValueStringBuilder.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf16Utility.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf16Utility.Validation.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf8.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf8Utility.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\Unicode\Utf8Utility.Helpers.cs" />
diff --git a/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs b/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs
index 97bb90f..73e4b71 100644
--- a/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs
+++ b/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs
@@ -7,7 +7,7 @@ using System.Reflection;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Serialization;
-using System.Text;
+using System.Text.Unicode;
 using Internal.Runtime.CompilerServices;
 
 namespace System.Globalization
diff --git a/src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs b/src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs
index 12ce6d9..8752628 100644
--- a/src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs
+++ b/src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs
@@ -7,6 +7,7 @@ using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Serialization;
 using System.Text;
+using System.Text.Unicode;
 using Internal.Runtime.CompilerServices;
 
 #if BIT64
diff --git a/src/System.Private.CoreLib/shared/System/Marvin.OrdinalIgnoreCase.cs b/src/System.Private.CoreLib/shared/System/Marvin.OrdinalIgnoreCase.cs
index beab0cf..9e9bb31 100644
--- a/src/System.Private.CoreLib/shared/System/Marvin.OrdinalIgnoreCase.cs
+++ b/src/System.Private.CoreLib/shared/System/Marvin.OrdinalIgnoreCase.cs
@@ -5,7 +5,7 @@
 using System.Buffers;
 using System.Diagnostics;
 using System.Runtime.InteropServices;
-using System.Text;
+using System.Text.Unicode;
 using Internal.Runtime.CompilerServices;
 
 #if BIT64
diff --git a/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs b/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs
index 9040a94..bb5aa5f 100644
--- a/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs
@@ -266,6 +266,7 @@ namespace System.Text
             // to be in progress. Unlike EncoderNLS, this is simply a Debug.Assert. No exception is thrown.
 
             Debug.Assert(_fallbackBuffer is null || _fallbackBuffer.Remaining == 0, "Should have no data remaining in the fallback buffer.");
+            Debug.Assert(HasLeftoverData, "Caller shouldn't invoke this routine unless there's leftover data in the decoder.");
 
             // Copy the existing leftover data plus as many bytes as possible of the new incoming data
             // into a temporary concated buffer, then get its char count by decoding it.
@@ -319,6 +320,7 @@ namespace System.Text
             // to be in progress. Unlike EncoderNLS, this is simply a Debug.Assert. No exception is thrown.
 
             Debug.Assert(_fallbackBuffer is null || _fallbackBuffer.Remaining == 0, "Should have no data remaining in the fallback buffer.");
+            Debug.Assert(HasLeftoverData, "Caller shouldn't invoke this routine unless there's leftover data in the decoder.");
 
             // Copy the existing leftover data plus as many bytes as possible of the new incoming data
             // into a temporary concated buffer, then transcode it from bytes to chars.
@@ -370,6 +372,14 @@ namespace System.Text
 
         Finish:
 
+            // Report back the number of bytes (from the new incoming span) we consumed just now.
+            // This calculation is simple: it's the difference between the original leftover byte
+            // count and the number of bytes from the combined buffer we needed to decode the first
+            // scalar value. We need to report this before the call to SetLeftoverData /
+            // ClearLeftoverData because those methods will overwrite the _leftoverByteCount field.
+
+            bytesConsumed = combinedBufferBytesConsumed - _leftoverByteCount;
+
             if (persistNewCombinedBuffer)
             {
                 Debug.Assert(combinedBufferBytesConsumed == combinedBuffer.Length, "We should be asked to persist the entire combined buffer.");
@@ -380,7 +390,6 @@ namespace System.Text
                 ClearLeftoverData(); // the buffer contains no partial data; we'll go down the normal paths
             }
 
-            bytesConsumed = combinedBufferBytesConsumed - _leftoverByteCount; // amount of 'bytes' buffer consumed just now
             return charsWritten;
 
         DestinationTooSmall:
diff --git a/src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs b/src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs
index 0e32167..ca740a1 100644
--- a/src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs
@@ -850,8 +850,14 @@ namespace System.Text
 
             ReadOnlySpan<byte> bytes = new ReadOnlySpan<byte>(pOriginalBytes, originalByteCount).Slice(bytesConsumedSoFar);
 
-            int totalCharCount = decoder.DrainLeftoverDataForGetCharCount(bytes, out int bytesConsumedJustNow);
-            bytes = bytes.Slice(bytesConsumedJustNow);
+            int bytesConsumedJustNow = 0;
+            int totalCharCount = 0;
+
+            if (decoder.HasLeftoverData)
+            {
+                totalCharCount = decoder.DrainLeftoverDataForGetCharCount(bytes, out bytesConsumedJustNow);
+                bytes = bytes.Slice(bytesConsumedJustNow);
+            }
 
             // Now try invoking the "fast path" (no fallback) implementation.
             // We can use Unsafe.AsPointer here since these spans are created from pinned data (raw pointers).
@@ -1120,10 +1126,15 @@ namespace System.Text
             ReadOnlySpan<byte> bytes = new ReadOnlySpan<byte>(pOriginalBytes, originalByteCount).Slice(bytesConsumedSoFar);
             Span<char> chars = new Span<char>(pOriginalChars, originalCharCount).Slice(charsWrittenSoFar);
 
-            int charsWrittenJustNow = decoder.DrainLeftoverDataForGetChars(bytes, chars, out int bytesConsumedJustNow);
+            int bytesConsumedJustNow = 0;
+            int charsWrittenJustNow = 0;
 
-            bytes = bytes.Slice(bytesConsumedJustNow);
-            chars = chars.Slice(charsWrittenJustNow);
+            if (decoder.HasLeftoverData)
+            {
+                charsWrittenJustNow = decoder.DrainLeftoverDataForGetChars(bytes, chars, out bytesConsumedJustNow);
+                bytes = bytes.Slice(bytesConsumedJustNow);
+                chars = chars.Slice(charsWrittenJustNow);
+            }
 
             Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "Should be no remaining fallback data at this point.");
 
diff --git a/src/System.Private.CoreLib/shared/System/Text/Rune.cs b/src/System.Private.CoreLib/shared/System/Text/Rune.cs
index a91c0fc..a71750e 100644
--- a/src/System.Private.CoreLib/shared/System/Text/Rune.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Rune.cs
@@ -6,6 +6,7 @@ using System.Buffers;
 using System.Diagnostics;
 using System.Globalization;
 using System.Runtime.CompilerServices;
+using System.Text.Unicode;
 
 namespace System.Text
 {
diff --git a/src/System.Private.CoreLib/shared/System/Text/UTF8Encoding.cs b/src/System.Private.CoreLib/shared/System/Text/UTF8Encoding.cs
index aaac975..7a3a1f7 100644
--- a/src/System.Private.CoreLib/shared/System/Text/UTF8Encoding.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/UTF8Encoding.cs
@@ -15,9 +15,11 @@
 #define FASTLOOP
 
 using System;
+using System.Buffers;
 using System.Diagnostics;
-using System.Globalization;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using System.Text.Unicode;
 
 namespace System.Text
 {
@@ -129,22 +131,26 @@ namespace System.Text
         public override unsafe int GetByteCount(char[] chars, int index, int count)
         {
             // Validate input parameters
-            if (chars == null)
-                throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
 
-            if (index < 0 || count < 0)
-                throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
+            if (chars is null)
+            {
+                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array);
+            }
 
-            if (chars.Length - index < count)
-                throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
+            if ((index | count) < 0)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException((index < 0) ? ExceptionArgument.index : ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+            }
 
-            // If no input, return 0, avoid fixed empty array problem
-            if (count == 0)
-                return 0;
+            if (chars.Length - index < count)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
+            }
 
-            // Just call the pointer version
             fixed (char* pChars = chars)
-                return GetByteCount(pChars + index, count, null);
+            {
+                return GetByteCountCommon(pChars + index, count);
+            }
         }
 
         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
@@ -154,12 +160,17 @@ namespace System.Text
 
         public override unsafe int GetByteCount(string chars)
         {
-            // Validate input
-            if (chars==null)
-                throw new ArgumentNullException("s");
+            // Validate input parameters
+
+            if (chars is null)
+            {
+                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars);
+            }
 
             fixed (char* pChars = chars)
-                return GetByteCount(pChars, chars.Length, null);
+            {
+                return GetByteCountCommon(pChars, chars.Length);
+            }
         }
 
         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
@@ -170,22 +181,78 @@ namespace System.Text
         public override unsafe int GetByteCount(char* chars, int count)
         {
             // Validate Parameters
+
             if (chars == null)
-                throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
+            {
+                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars);
+            }
 
             if (count < 0)
-                throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+            }
 
-            // Call it with empty encoder
-            return GetByteCount(chars, count, null);
+            return GetByteCountCommon(chars, count);
         }
 
         public override unsafe int GetByteCount(ReadOnlySpan<char> chars)
         {
-            fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
+            // It's ok for us to pass null pointers down to the workhorse below.
+
+            fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
+            {
+                return GetByteCountCommon(charsPtr, chars.Length);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private unsafe int GetByteCountCommon(char* pChars, int charCount)
+        {
+            // Common helper method for all non-EncoderNLS entry points to GetByteCount.
+            // A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
+
+            Debug.Assert(charCount >= 0, "Caller should't specify negative length buffer.");
+            Debug.Assert(pChars != null || charCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
+
+            // First call into the fast path.
+            // Don't bother providing a fallback mechanism; our fast path doesn't use it.
+
+            int totalByteCount = GetByteCountFast(pChars, charCount, fallback: null, out int charsConsumed);
+
+            if (charsConsumed != charCount)
+            {
+                // If there's still data remaining in the source buffer, go down the fallback path.
+                // We need to check for integer overflow since the fallback could change the required
+                // output count in unexpected ways.
+
+                totalByteCount += GetByteCountWithFallback(pChars, charCount, charsConsumed);
+                if (totalByteCount < 0)
+                {
+                    ThrowConversionOverflow();
+                }
+            }
+
+            return totalByteCount;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetCharCountCommon
+        private protected sealed override unsafe int GetByteCountFast(char* pChars, int charsLength, EncoderFallback fallback, out int charsConsumed)
+        {
+            // The number of UTF-8 code units may exceed the number of UTF-16 code units,
+            // so we'll need to check for overflow before casting to Int32.
+
+            char* ptrToFirstInvalidChar = Utf16Utility.GetPointerToFirstInvalidChar(pChars, charsLength, out long utf8CodeUnitCountAdjustment, out _);
+
+            int tempCharsConsumed = (int)(ptrToFirstInvalidChar - pChars);
+            charsConsumed = tempCharsConsumed;
+
+            long totalUtf8Bytes = tempCharsConsumed + utf8CodeUnitCountAdjustment;
+            if ((ulong)totalUtf8Bytes > int.MaxValue)
             {
-                return GetByteCount(charsPtr, chars.Length, baseEncoder: null);
+                ThrowConversionOverflow();
             }
+
+            return (int)totalUtf8Bytes;
         }
 
         // Parent method is safe.
@@ -196,22 +263,37 @@ namespace System.Text
         public override unsafe int GetBytes(string s, int charIndex, int charCount,
                                               byte[] bytes, int byteIndex)
         {
-            if (s == null || bytes == null)
-                throw new ArgumentNullException((s == null ? nameof(s) : nameof(bytes)), SR.ArgumentNull_Array);
+            // Validate Parameters
+
+            if (s is null || bytes is null)
+            {
+                ThrowHelper.ThrowArgumentNullException(
+                    argument: (s is null) ? ExceptionArgument.s : ExceptionArgument.bytes,
+                    resource: ExceptionResource.ArgumentNull_Array);
+            }
 
-            if (charIndex < 0 || charCount < 0)
-                throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
+            if ((charIndex | charCount) < 0)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(
+                    argument: (charIndex < 0) ? ExceptionArgument.charIndex : ExceptionArgument.charCount,
+                    resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+            }
 
             if (s.Length - charIndex < charCount)
-                throw new ArgumentOutOfRangeException(nameof(s), SR.ArgumentOutOfRange_IndexCount);
-
-            if (byteIndex < 0 || byteIndex > bytes.Length)
-                throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.s, ExceptionResource.ArgumentOutOfRange_IndexCount);
+            }
 
-            int byteCount = bytes.Length - byteIndex;
+            if ((uint)byteIndex > bytes.Length)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.byteIndex, ExceptionResource.ArgumentOutOfRange_Index);
+            }
 
-            fixed (char* pChars = s) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
-                return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+            fixed (char* pChars = s)
+            fixed (byte* pBytes = bytes)
+            {
+                return GetBytesCommon(pChars + charIndex, charCount, pBytes + byteIndex, bytes.Length - byteIndex);
+            }
         }
 
         // Encodes a range of characters in a character array into a range of bytes
@@ -232,28 +314,36 @@ namespace System.Text
                                                byte[] bytes, int byteIndex)
         {
             // Validate parameters
-            if (chars == null || bytes == null)
-                throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
-
-            if (charIndex < 0 || charCount < 0)
-                throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
 
-            if (chars.Length - charIndex < charCount)
-                throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
+            if (chars is null || bytes is null)
+            {
+                ThrowHelper.ThrowArgumentNullException(
+                    argument: (chars is null) ? ExceptionArgument.chars : ExceptionArgument.bytes,
+                    resource: ExceptionResource.ArgumentNull_Array);
+            }
 
-            if (byteIndex < 0 || byteIndex > bytes.Length)
-                throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
+            if ((charIndex | charCount) < 0)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(
+                    argument: (charIndex < 0) ? ExceptionArgument.charIndex : ExceptionArgument.charCount,
+                    resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+            }
 
-            // If nothing to encode return 0, avoid fixed problem
-            if (charCount == 0)
-                return 0;
+            if (chars.Length - charIndex < charCount)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCount);
+            }
 
-            // Just call pointer version
-            int byteCount = bytes.Length - byteIndex;
+            if ((uint)byteIndex > bytes.Length)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.byteIndex, ExceptionResource.ArgumentOutOfRange_Index);
+            }
 
-            fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
-                // Remember that byteCount is # to decode, not size of array.
-                return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+            fixed (char* pChars = chars)
+            fixed (byte* pBytes = bytes)
+            {
+                return GetBytesCommon(pChars + charIndex, charCount, pBytes + byteIndex, bytes.Length - byteIndex);
+            }
         }
 
         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
@@ -264,24 +354,77 @@ namespace System.Text
         public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
         {
             // Validate Parameters
-            if (bytes == null || chars == null)
-                throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
 
-            if (charCount < 0 || byteCount < 0)
-                throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
+            if (chars == null || bytes == null)
+            {
+                ThrowHelper.ThrowArgumentNullException(
+                    argument: (chars is null) ? ExceptionArgument.chars : ExceptionArgument.bytes,
+                    resource: ExceptionResource.ArgumentNull_Array);
+            }
+
+            if ((charCount | byteCount) < 0)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(
+                    argument: (charCount < 0) ? ExceptionArgument.charCount : ExceptionArgument.byteCount,
+                    resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+            }
 
-            return GetBytes(chars, charCount, bytes, byteCount, null);
+            return GetBytesCommon(chars, charCount, bytes, byteCount);
         }
 
         public override unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
         {
-            fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
-            fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
+            // It's ok for us to operate on null / empty spans.
+
+            fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
+            fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
             {
-                return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length, baseEncoder: null);
+                return GetBytesCommon(charsPtr, chars.Length, bytesPtr, bytes.Length);
             }
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int byteCount)
+        {
+            // Common helper method for all non-EncoderNLS entry points to GetBytes.
+            // A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
+
+            Debug.Assert(charCount >= 0, "Caller should't specify negative length buffer.");
+            Debug.Assert(pChars != null || charCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
+            Debug.Assert(byteCount >= 0, "Caller should't specify negative length buffer.");
+            Debug.Assert(pBytes != null || byteCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
+
+            // First call into the fast path.
+
+            int bytesWritten = GetBytesFast(pChars, charCount, pBytes, byteCount, out int charsConsumed);
+
+            if (charsConsumed == charCount)
+            {
+                // All elements converted - return immediately.
+
+                return bytesWritten;
+            }
+            else
+            {
+                // Simple narrowing conversion couldn't operate on entire buffer - invoke fallback.
+
+                return GetBytesWithFallback(pChars, charCount, pBytes, byteCount, charsConsumed, bytesWritten);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetBytesCommon
+        private protected sealed override unsafe int GetBytesFast(char* pChars, int charsLength, byte* pBytes, int bytesLength, out int charsConsumed)
+        {
+            // We don't care about the exact OperationStatus value returned by the workhorse routine; we only
+            // care if the workhorse was able to consume the entire input payload. If we're unable to do so,
+            // we'll handle the remainder in the fallback routine.
+
+            Utf8Utility.TranscodeToUtf8(pChars, charsLength, pBytes, bytesLength, out char* pInputBufferRemaining, out byte* pOutputBufferRemaining);
+
+            charsConsumed = (int)(pInputBufferRemaining - pChars);
+            return (int)(pOutputBufferRemaining - pBytes);
+        }
+
         // Returns the number of characters produced by decoding a range of bytes
         // in a byte array.
         //
@@ -293,22 +436,26 @@ namespace System.Text
         public override unsafe int GetCharCount(byte[] bytes, int index, int count)
         {
             // Validate Parameters
-            if (bytes == null)
-                throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
 
-            if (index < 0 || count < 0)
-                throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
+            if (bytes is null)
+            {
+                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array);
+            }
 
-            if (bytes.Length - index < count)
-                throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
+            if ((index | count) < 0)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException((index < 0) ? ExceptionArgument.index : ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+            }
 
-            // If no input just return 0, fixed doesn't like 0 length arrays.
-            if (count == 0)
-                return 0;
+            if (bytes.Length - index < count)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
+            }
 
-            // Just call pointer version
             fixed (byte* pBytes = bytes)
-                return GetCharCount(pBytes + index, count, null);
+            {
+                return GetCharCountCommon(pBytes + index, count);
+            }
         }
 
         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
@@ -319,20 +466,27 @@ namespace System.Text
         public override unsafe int GetCharCount(byte* bytes, int count)
         {
             // Validate Parameters
+
             if (bytes == null)
-                throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
+            {
+                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array);
+            }
 
             if (count < 0)
-                throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+            }
 
-            return GetCharCount(bytes, count, null);
+            return GetCharCountCommon(bytes, count);
         }
 
         public override unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
         {
-            fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
+            // It's ok for us to pass null pointers down to the workhorse routine.
+
+            fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
             {
-                return GetCharCount(bytesPtr, bytes.Length, baseDecoder: null);
+                return GetCharCountCommon(bytesPtr, bytes.Length);
             }
         }
 
@@ -345,28 +499,36 @@ namespace System.Text
                                               char[] chars, int charIndex)
         {
             // Validate Parameters
-            if (bytes == null || chars == null)
-                throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
-
-            if (byteIndex < 0 || byteCount < 0)
-                throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
 
-            if ( bytes.Length - byteIndex < byteCount)
-                throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
+            if (bytes is null || chars is null)
+            {
+                ThrowHelper.ThrowArgumentNullException(
+                    argument: (bytes is null) ? ExceptionArgument.bytes : ExceptionArgument.chars,
+                    resource: ExceptionResource.ArgumentNull_Array);
+            }
 
-            if (charIndex < 0 || charIndex > chars.Length)
-                throw new ArgumentOutOfRangeException(nameof(charIndex), SR.ArgumentOutOfRange_Index);
+            if ((byteIndex | byteCount) < 0)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(
+                    argument: (byteIndex < 0) ? ExceptionArgument.byteIndex : ExceptionArgument.byteCount,
+                    resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+            }
 
-            // If no input, return 0 & avoid fixed problem
-            if (byteCount == 0)
-                return 0;
+            if (bytes.Length - byteIndex < byteCount)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
+            }
 
-            // Just call pointer version
-            int charCount = chars.Length - charIndex;
+            if ((uint)charIndex > (uint)chars.Length)
+            {
+                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.charIndex, ExceptionResource.ArgumentOutOfRange_Index);
+            }
 
-            fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
-                // Remember that charCount is # to decode, not size of array
-                return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
+            fixed (byte* pBytes = bytes)
+            fixed (char* pChars = chars)
+            {
+                return GetCharsCommon(pBytes + byteIndex, byteCount, pChars + charIndex, chars.Length - charIndex);
+            }
         }
 
         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
@@ -377,2120 +539,245 @@ namespace System.Text
         public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
         {
             // Validate Parameters
-            if (bytes == null || chars == null)
-                throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
-
-            if (charCount < 0 || byteCount < 0)
-                throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
 
-            return GetChars(bytes, byteCount, chars, charCount, null);
-        }
+            if (bytes is null || chars is null)
+            {
+                ThrowHelper.ThrowArgumentNullException(
+                    argument: (bytes is null) ? ExceptionArgument.bytes : ExceptionArgument.chars,
+                    resource: ExceptionResource.ArgumentNull_Array);
+            }
 
-        public override unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
-        {
-            fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
-            fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
+            if ((byteCount | charCount) < 0)
             {
-                return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length, baseDecoder: null);
+                ThrowHelper.ThrowArgumentOutOfRangeException(
+                    argument: (byteCount < 0) ? ExceptionArgument.byteCount : ExceptionArgument.charCount,
+                    resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
             }
-        }
 
-        // Returns a string containing the decoded representation of a range of
-        // bytes in a byte array.
-        //
-        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
-        // So if you fix this, fix the others.  Currently those include:
-        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
-        // parent method is safe
+            return GetCharsCommon(bytes, byteCount, chars, charCount);
+        }
 
-        public override unsafe string GetString(byte[] bytes, int index, int count)
+        public override unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
         {
-            // Validate Parameters
-            if (bytes == null)
-                throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
-
-            if (index < 0 || count < 0)
-                throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
-
-            if (bytes.Length - index < count)
-                throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
-
-            // Avoid problems with empty input buffer
-            if (count == 0) return string.Empty;
+            // It's ok for us to pass null pointers down to the workhorse below.
 
-            fixed (byte* pBytes = bytes)
-                return string.CreateStringFromEncoding(
-                    pBytes + index, count, this);
+            fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
+            fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
+            {
+                return GetCharsCommon(bytesPtr, bytes.Length, charsPtr, chars.Length);
+            }
         }
 
+        // WARNING:  If we throw an error, then System.Resources.ResourceReader calls this method.
+        //           So if we're really broken, then that could also throw an error... recursively.
+        //           So try to make sure GetChars can at least process all uses by
+        //           System.Resources.ResourceReader!
         //
-        // End of standard methods copied from EncodingNLS.cs
-        //
-
-        // To simplify maintenance, the structure of GetByteCount and GetBytes should be
-        // kept the same as much as possible
-        internal sealed override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder)
+        // Note:  We throw exceptions on individually encoded surrogates and other non-shortest forms.
+        //        If exceptions aren't turned on, then we drop all non-shortest &individual surrogates.
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private unsafe int GetCharsCommon(byte* pBytes, int byteCount, char* pChars, int charCount)
         {
-            // For fallback we may need a fallback buffer.
-            // We wait to initialize it though in case we don't have any broken input unicode
-            EncoderFallbackBuffer fallbackBuffer = null;
-            char* pSrcForFallback;
+            // Common helper method for all non-DecoderNLS entry points to GetChars.
+            // A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
 
-            char* pSrc = chars;
-            char* pEnd = pSrc + count;
+            Debug.Assert(byteCount >= 0, "Caller should't specify negative length buffer.");
+            Debug.Assert(pBytes != null || byteCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
+            Debug.Assert(charCount >= 0, "Caller should't specify negative length buffer.");
+            Debug.Assert(pChars != null || charCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
 
-            // Start by assuming we have as many as count
-            int byteCount = count;
+            // First call into the fast path.
 
-            int ch = 0;
+            int charsWritten = GetCharsFast(pBytes, byteCount, pChars, charCount, out int bytesConsumed);
 
-            if (baseEncoder != null)
+            if (bytesConsumed == byteCount)
             {
-                UTF8Encoder encoder = (UTF8Encoder)baseEncoder;
-                ch = encoder.surrogateChar;
-
-                // We mustn't have left over fallback data when counting
-                if (encoder.InternalHasFallbackBuffer)
-                {
-                    fallbackBuffer = encoder.FallbackBuffer;
-                    if (fallbackBuffer.Remaining > 0)
-                        throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
+                // All elements converted - return immediately.
 
-                    // Set our internal fallback interesting things.
-                    fallbackBuffer.InternalInitialize(chars, pEnd, encoder, false);
-                }
+                return charsWritten;
             }
-
-            for (;;)
+            else
             {
-                // SLOWLOOP: does all range checks, handles all special cases, but it is slow
-                if (pSrc >= pEnd)
-                {
-                    if (ch == 0)
-                    {
-                        // Unroll any fallback that happens at the end
-                        ch = fallbackBuffer != null ? fallbackBuffer.InternalGetNextChar() : 0;
-                        if (ch > 0)
-                        {
-                            byteCount++;
-                            goto ProcessChar;
-                        }
-                    }
-                    else
-                    {
-                        // Case of surrogates in the fallback.
-                        if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
-                        {
-                            Debug.Assert(ch >= 0xD800 && ch <= 0xDBFF,
-                                "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
-
-                            ch = fallbackBuffer.InternalGetNextChar();
-                            byteCount++;
-
-                            if (InRange(ch, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
-                            {
-                                ch = 0xfffd;
-                                byteCount++;
-                                goto EncodeChar;
-                            }
-                            else if (ch > 0)
-                            {
-                                goto ProcessChar;
-                            }
-                            else
-                            {
-                                byteCount--; // ignore last one.
-                                break;
-                            }
-                        }
-                    }
-
-                    if (ch <= 0)
-                    {
-                        break;
-                    }
-                    if (baseEncoder != null && !baseEncoder.MustFlush)
-                    {
-                        break;
-                    }
-
-                    // attempt to encode the partial surrogate (will fallback or ignore it), it'll also subtract 1.
-                    byteCount++;
-                    goto EncodeChar;
-                }
-
-                if (ch > 0)
-                {
-                    Debug.Assert(ch >= 0xD800 && ch <= 0xDBFF,
-                        "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
-
-                    // use separate helper variables for local contexts so that the jit optimizations
-                    // won't get confused about the variable lifetimes
-                    int cha = *pSrc;
-
-                    // count the pending surrogate
-                    byteCount++;
-
-                    // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
-                    // if (IsLowSurrogate(cha)) {
-                    if (InRange(cha, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
-                    {
-                        // Don't need a real # because we're just counting, anything > 0x7ff ('cept surrogate) will do.
-                        ch = 0xfffd;
-                        //                        ch = cha + (ch << 10) +
-                        //                            (0x10000
-                        //                            - CharUnicodeInfo.LOW_SURROGATE_START
-                        //                            - (CharUnicodeInfo.HIGH_SURROGATE_START << 10) );
-
-                        // Use this next char
-                        pSrc++;
-                    }
-                    // else ch is still high surrogate and encoding will fail (so don't add count)
-
-                    // attempt to encode the surrogate or partial surrogate
-                    goto EncodeChar;
-                }
-
-                // If we've used a fallback, then we have to check for it
-                if (fallbackBuffer != null)
-                {
-                    ch = fallbackBuffer.InternalGetNextChar();
-                    if (ch > 0)
-                    {
-                        // We have an extra byte we weren't expecting.
-                        byteCount++;
-                        goto ProcessChar;
-                    }
-                }
-
-                // read next char. The JIT optimization seems to be getting confused when
-                // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
-                ch = *pSrc;
-                pSrc++;
-
-            ProcessChar:
-                // if (IsHighSurrogate(ch)) {
-                if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.HIGH_SURROGATE_END))
-                {
-                    // we will count this surrogate next time around
-                    byteCount--;
-                    continue;
-                }
-            // either good char or partial surrogate
-
-            EncodeChar:
-                // throw exception on partial surrogate if necessary
-                // if (IsLowSurrogate(ch) || IsHighSurrogate(ch))
-                if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
-                {
-                    // Lone surrogates aren't allowed
-                    // Have to make a fallback buffer if we don't have one
-                    if (fallbackBuffer == null)
-                    {
-                        // wait on fallbacks if we can
-                        // For fallback we may need a fallback buffer
-                        if (baseEncoder == null)
-                            fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
-                        else
-                            fallbackBuffer = baseEncoder.FallbackBuffer;
-
-                        // Set our internal fallback interesting things.
-                        fallbackBuffer.InternalInitialize(chars, chars + count, baseEncoder, false);
-                    }
-
-                    // Do our fallback.  Actually we already know its a mixed up surrogate,
-                    // so the ref pSrc isn't gonna do anything.
-                    pSrcForFallback = pSrc; // Avoid passing pSrc by reference to allow it to be en-registered
-                    fallbackBuffer.InternalFallback(unchecked((char)ch), ref pSrcForFallback);
-                    pSrc = pSrcForFallback;
-
-                    // Ignore it if we don't throw (we had preallocated this ch)
-                    byteCount--;
-                    ch = 0;
-                    continue;
-                }
-
-                // Count them
-                if (ch > 0x7F)
-                {
-                    if (ch > 0x7FF)
-                    {
-                        // the extra surrogate byte was compensated by the second surrogate character
-                        // (2 surrogates make 4 bytes.  We've already counted 2 bytes, 1 per char)
-                        byteCount++;
-                    }
-                    byteCount++;
-                }
-
-#if BIT64
-                // check for overflow
-                if (byteCount < 0)
-                {
-                    break;
-                }
-#endif
-
-#if FASTLOOP
-                // If still have fallback don't do fast loop
-                if (fallbackBuffer != null && (ch = fallbackBuffer.InternalGetNextChar()) != 0)
-                {
-                    // We're reserving 1 byte for each char by default
-                    byteCount++;
-                    goto ProcessChar;
-                }
-
-                int availableChars = PtrDiff(pEnd, pSrc);
+                // Simple narrowing conversion couldn't operate on entire buffer - invoke fallback.
 
-                // don't fall into the fast decoding loop if we don't have enough characters
-                if (availableChars <= 13)
-                {
-                    // try to get over the remainder of the ascii characters fast though
-                    char* pLocalEnd = pEnd; // hint to get pLocalEnd en-registered
-                    while (pSrc < pLocalEnd)
-                    {
-                        ch = *pSrc;
-                        pSrc++;
-                        if (ch > 0x7F)
-                            goto ProcessChar;
-                    }
-
-                    // we are done
-                    break;
-                }
-
-#if BIT64
-                // make sure that we won't get a silent overflow inside the fast loop
-                // (Fall out to slow loop if we have this many characters)
-                availableChars &= 0x0FFFFFFF;
-#endif
-
-                // To compute the upper bound, assume that all characters are ASCII characters at this point,
-                //  the boundary will be decreased for every non-ASCII character we encounter
-                // Also, we need 3 + 4 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
-                char* pStop = pSrc + availableChars - (3 + 4);
-
-                while (pSrc < pStop)
-                {
-                    ch = *pSrc;
-                    pSrc++;
-
-                    if (ch > 0x7F)                                                  // Not ASCII
-                    {
-                        if (ch > 0x7FF)                                             // Not 2 Byte
-                        {
-                            if ((ch & 0xF800) == 0xD800)                            // See if its a Surrogate
-                                goto LongCode;
-                            byteCount++;
-                        }
-                        byteCount++;
-                    }
-
-                    // get pSrc aligned
-                    if ((unchecked((int)pSrc) & 0x2) != 0)
-                    {
-                        ch = *pSrc;
-                        pSrc++;
-                        if (ch > 0x7F)                                              // Not ASCII
-                        {
-                            if (ch > 0x7FF)                                         // Not 2 Byte
-                            {
-                                if ((ch & 0xF800) == 0xD800)                        // See if its a Surrogate
-                                    goto LongCode;
-                                byteCount++;
-                            }
-                            byteCount++;
-                        }
-                    }
-
-                    // Run 2 * 4 characters at a time!
-                    while (pSrc < pStop)
-                    {
-                        ch = *(int*)pSrc;
-                        int chc = *(int*)(pSrc + 2);
-                        if (((ch | chc) & unchecked((int)0xFF80FF80)) != 0)         // See if not ASCII
-                        {
-                            if (((ch | chc) & unchecked((int)0xF800F800)) != 0)     // See if not 2 Byte
-                            {
-                                goto LongCodeWithMask;
-                            }
-
-
-                            if ((ch & unchecked((int)0xFF800000)) != 0)             // Actually 0x07800780 is all we care about (4 bits)
-                                byteCount++;
-                            if ((ch & unchecked((int)0xFF80)) != 0)
-                                byteCount++;
-                            if ((chc & unchecked((int)0xFF800000)) != 0)
-                                byteCount++;
-                            if ((chc & unchecked((int)0xFF80)) != 0)
-                                byteCount++;
-                        }
-                        pSrc += 4;
-
-                        ch = *(int*)pSrc;
-                        chc = *(int*)(pSrc + 2);
-                        if (((ch | chc) & unchecked((int)0xFF80FF80)) != 0)         // See if not ASCII
-                        {
-                            if (((ch | chc) & unchecked((int)0xF800F800)) != 0)     // See if not 2 Byte
-                            {
-                                goto LongCodeWithMask;
-                            }
-
-                            if ((ch & unchecked((int)0xFF800000)) != 0)
-                                byteCount++;
-                            if ((ch & unchecked((int)0xFF80)) != 0)
-                                byteCount++;
-                            if ((chc & unchecked((int)0xFF800000)) != 0)
-                                byteCount++;
-                            if ((chc & unchecked((int)0xFF80)) != 0)
-                                byteCount++;
-                        }
-                        pSrc += 4;
-                    }
-                    break;
-
-                LongCodeWithMask:
-                    if (BitConverter.IsLittleEndian)
-                    {
-                        ch = (char)ch;
-                    }
-                    else
-                    {
-                        // be careful about the sign extension
-                        ch = (int)(((uint)ch) >> 16);
-                    }
-                    pSrc++;
-
-                    if (ch <= 0x7F)
-                    {
-                        continue;
-                    }
-
-                LongCode:
-                    // use separate helper variables for slow and fast loop so that the jit optimizations
-                    // won't get confused about the variable lifetimes
-                    if (ch > 0x7FF)
-                    {
-                        // if (IsLowSurrogate(ch) || IsHighSurrogate(ch))
-                        if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
-                        {
-                            // 4 byte encoding - high surrogate + low surrogate
-
-                            int chd = *pSrc;
-                            if (
-                                // !IsHighSurrogate(ch) // low without high -> bad
-                                ch > CharUnicodeInfo.HIGH_SURROGATE_END ||
-                                // !IsLowSurrogate(chd) // high not followed by low -> bad
-                                !InRange(chd, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
-                            {
-                                // Back up and drop out to slow loop to figure out error
-                                pSrc--;
-                                break;
-                            }
-                            pSrc++;
-
-                            // byteCount - this byte is compensated by the second surrogate character
-                        }
-                        byteCount++;
-                    }
-                    byteCount++;
-
-                    // byteCount - the last byte is already included
-                }
-#endif // FASTLOOP
-
-                // no pending char at this point
-                ch = 0;
+                return GetCharsWithFallback(pBytes, byteCount, pChars, charCount, bytesConsumed, charsWritten);
             }
-
-#if BIT64
-            // check for overflow
-            if (byteCount < 0)
-            {
-                throw new ArgumentException(
-                        SR.Argument_ConversionOverflow);
-            }
-#endif
-
-            Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
-                "[UTF8Encoding.GetByteCount]Expected Empty fallback buffer");
-
-            return byteCount;
         }
 
-        // diffs two char pointers using unsigned arithmetic. The unsigned arithmetic
-        // is good enough for us, and it tends to generate better code than the signed
-        // arithmetic generated by default
-        private static unsafe int PtrDiff(char* a, char* b)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetCharsCommon
+        private protected sealed override unsafe int GetCharsFast(byte* pBytes, int bytesLength, char* pChars, int charsLength, out int bytesConsumed)
         {
-            return (int)(((uint)((byte*)a - (byte*)b)) >> 1);
-        }
+            // We don't care about the exact OperationStatus value returned by the workhorse routine; we only
+            // care if the workhorse was able to consume the entire input payload. If we're unable to do so,
+            // we'll handle the remainder in the fallback routine.
 
-        // byte* flavor just for parity
-        private static unsafe int PtrDiff(byte* a, byte* b)
-        {
-            return (int)(a - b);
-        }
+            Utf8Utility.TranscodeToUtf16(pBytes, bytesLength, pChars, charsLength, out byte* pInputBufferRemaining, out char* pOutputBufferRemaining);
 
-        private static bool InRange(int ch, int start, int end)
-        {
-            return (uint)(ch - start) <= (uint)(end - start);
+            bytesConsumed = (int)(pInputBufferRemaining - pBytes);
+            return (int)(pOutputBufferRemaining - pChars);
         }
 
-        // Our workhorse
-        // Note:  We ignore mismatched surrogates, unless the exception flag is set in which case we throw
-        internal sealed override unsafe int GetBytes(
-            char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder)
+        private protected sealed override unsafe int GetCharsWithFallback(ReadOnlySpan<byte> bytes, int originalBytesLength, Span<char> chars, int originalCharsLength, DecoderNLS decoder)
         {
-            Debug.Assert(chars != null, "[UTF8Encoding.GetBytes]chars!=null");
-            Debug.Assert(byteCount >= 0, "[UTF8Encoding.GetBytes]byteCount >=0");
-            Debug.Assert(charCount >= 0, "[UTF8Encoding.GetBytes]charCount >=0");
-            Debug.Assert(bytes != null, "[UTF8Encoding.GetBytes]bytes!=null");
-
-            UTF8Encoder encoder = null;
-
-            // For fallback we may need a fallback buffer.
-            // We wait to initialize it though in case we don't have any broken input unicode
-            EncoderFallbackBuffer fallbackBuffer = null;
-            char* pSrcForFallback;
-
-            char* pSrc = chars;
-            byte* pTarget = bytes;
-
-            char* pEnd = pSrc + charCount;
-            byte* pAllocatedBufferEnd = pTarget + byteCount;
-
-            int ch = 0;
-
-            // assume that JIT will en-register pSrc, pTarget and ch
+            // We special-case DecoderReplacementFallback if it's telling us to write a single U+FFFD char,
+            // since we believe this to be relatively common and we can handle it more efficiently than
+            // the base implementation.
 
-            if (baseEncoder != null)
+            if (((decoder is null) ? this.DecoderFallback : decoder.Fallback) is DecoderReplacementFallback replacementFallback
+                && replacementFallback.MaxCharCount == 1
+                && replacementFallback.DefaultString[0] == UnicodeUtility.ReplacementChar)
             {
-                encoder = (UTF8Encoder)baseEncoder;
-                ch = encoder.surrogateChar;
-
-                // We mustn't have left over fallback data when counting
-                if (encoder.InternalHasFallbackBuffer)
-                {
-                    // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
-                    fallbackBuffer = encoder.FallbackBuffer;
-                    if (fallbackBuffer.Remaining > 0 && encoder._throwOnOverflow)
-                        throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
-
-                    // Set our internal fallback interesting things.
-                    fallbackBuffer.InternalInitialize(chars, pEnd, encoder, true);
-                }
-            }
-
-            for (;;)
-            {
-                // SLOWLOOP: does all range checks, handles all special cases, but it is slow
-
-                if (pSrc >= pEnd)
-                {
-                    if (ch == 0)
-                    {
-                        // Check if there's anything left to get out of the fallback buffer
-                        ch = fallbackBuffer != null ? fallbackBuffer.InternalGetNextChar() : 0;
-                        if (ch > 0)
-                        {
-                            goto ProcessChar;
-                        }
-                    }
-                    else
-                    {
-                        // Case of leftover surrogates in the fallback buffer
-                        if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
-                        {
-                            Debug.Assert(ch >= 0xD800 && ch <= 0xDBFF,
-                                "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
-
-                            int cha = ch;
-
-                            ch = fallbackBuffer.InternalGetNextChar();
-
-                            if (InRange(ch, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
-                            {
-                                ch = ch + (cha << 10) + (0x10000 - CharUnicodeInfo.LOW_SURROGATE_START - (CharUnicodeInfo.HIGH_SURROGATE_START << 10));
-                                goto EncodeChar;
-                            }
-                            else if (ch > 0)
-                            {
-                                goto ProcessChar;
-                            }
-                            else
-                            {
-                                break;
-                            }
-                        }
-                    }
-
-                    // attempt to encode the partial surrogate (will fail or ignore)
-                    if (ch > 0 && (encoder == null || encoder.MustFlush))
-                        goto EncodeChar;
-
-                    // We're done
-                    break;
-                }
-
-                if (ch > 0)
-                {
-                    // We have a high surrogate left over from a previous loop.
-                    Debug.Assert(ch >= 0xD800 && ch <= 0xDBFF,
-                        "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture));
-
-                    // use separate helper variables for local contexts so that the jit optimizations
-                    // won't get confused about the variable lifetimes
-                    int cha = *pSrc;
-
-                    // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here.
-                    // if (IsLowSurrogate(cha)) {
-                    if (InRange(cha, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
-                    {
-                        ch = cha + (ch << 10) +
-                            (0x10000
-                            - CharUnicodeInfo.LOW_SURROGATE_START
-                            - (CharUnicodeInfo.HIGH_SURROGATE_START << 10));
-
-                        pSrc++;
-                    }
-                    // else ch is still high surrogate and encoding will fail
-
-                    // attempt to encode the surrogate or partial surrogate
-                    goto EncodeChar;
-                }
-
-                // If we've used a fallback, then we have to check for it
-                if (fallbackBuffer != null)
-                {
-                    ch = fallbackBuffer.InternalGetNextChar();
-                    if (ch > 0) goto ProcessChar;
-                }
-
-                // read next char. The JIT optimization seems to be getting confused when
-                // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
-                ch = *pSrc;
-                pSrc++;
-
-            ProcessChar:
-                // if (IsHighSurrogate(ch)) {
-                if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.HIGH_SURROGATE_END))
-                {
-                    continue;
-                }
-            // either good char or partial surrogate
-
-            EncodeChar:
-                // throw exception on partial surrogate if necessary
-                // if (IsLowSurrogate(ch) || IsHighSurrogate(ch))
-                if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
-                {
-                    // Lone surrogates aren't allowed, we have to do fallback for them
-                    // Have to make a fallback buffer if we don't have one
-                    if (fallbackBuffer == null)
-                    {
-                        // wait on fallbacks if we can
-                        // For fallback we may need a fallback buffer
-                        if (baseEncoder == null)
-                            fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
-                        else
-                            fallbackBuffer = baseEncoder.FallbackBuffer;
-
-                        // Set our internal fallback interesting things.
-                        fallbackBuffer.InternalInitialize(chars, pEnd, baseEncoder, true);
-                    }
-
-                    // Do our fallback.  Actually we already know its a mixed up surrogate,
-                    // so the ref pSrc isn't gonna do anything.
-                    pSrcForFallback = pSrc; // Avoid passing pSrc by reference to allow it to be en-registered
-                    fallbackBuffer.InternalFallback(unchecked((char)ch), ref pSrcForFallback);
-                    pSrc = pSrcForFallback;
-
-                    // Ignore it if we don't throw
-                    ch = 0;
-                    continue;
-                }
-
-                // Count bytes needed
-                int bytesNeeded = 1;
-                if (ch > 0x7F)
-                {
-                    if (ch > 0x7FF)
-                    {
-                        if (ch > 0xFFFF)
-                        {
-                            bytesNeeded++;  // 4 bytes (surrogate pair)
-                        }
-                        bytesNeeded++;      // 3 bytes (800-FFFF)
-                    }
-                    bytesNeeded++;          // 2 bytes (80-7FF)
-                }
-
-                if (pTarget > pAllocatedBufferEnd - bytesNeeded)
-                {
-                    // Left over surrogate from last time will cause pSrc == chars, so we'll throw
-                    if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
-                    {
-                        fallbackBuffer.MovePrevious();              // Didn't use this fallback char
-                        if (ch > 0xFFFF)
-                            fallbackBuffer.MovePrevious();          // Was surrogate, didn't use 2nd part either
-                    }
-                    else
-                    {
-                        pSrc--;                                     // Didn't use this char
-                        if (ch > 0xFFFF)
-                            pSrc--;                                 // Was surrogate, didn't use 2nd part either
-                    }
-                    Debug.Assert(pSrc >= chars || pTarget == bytes,
-                        "[UTF8Encoding.GetBytes]Expected pSrc to be within buffer or to throw with insufficient room.");
-                    ThrowBytesOverflow(encoder, pTarget == bytes);  // Throw if we must
-                    ch = 0;                                         // Nothing left over (we backed up to start of pair if supplementary)
-                    break;
-                }
-
-                if (ch <= 0x7F)
-                {
-                    *pTarget = (byte)ch;
-                }
-                else
-                {
-                    // use separate helper variables for local contexts so that the jit optimizations
-                    // won't get confused about the variable lifetimes
-                    int chb;
-                    if (ch <= 0x7FF)
-                    {
-                        // 2 byte encoding
-                        chb = (byte)(unchecked((sbyte)0xC0) | (ch >> 6));
-                    }
-                    else
-                    {
-                        if (ch <= 0xFFFF)
-                        {
-                            chb = (byte)(unchecked((sbyte)0xE0) | (ch >> 12));
-                        }
-                        else
-                        {
-                            *pTarget = (byte)(unchecked((sbyte)0xF0) | (ch >> 18));
-                            pTarget++;
-
-                            chb = unchecked((sbyte)0x80) | (ch >> 12) & 0x3F;
-                        }
-                        *pTarget = (byte)chb;
-                        pTarget++;
-
-                        chb = unchecked((sbyte)0x80) | (ch >> 6) & 0x3F;
-                    }
-                    *pTarget = (byte)chb;
-                    pTarget++;
-
-                    *pTarget = (byte)(unchecked((sbyte)0x80) | ch & 0x3F);
-                }
-                pTarget++;
-
-
-#if FASTLOOP
-                // If still have fallback don't do fast loop
-                if (fallbackBuffer != null && (ch = fallbackBuffer.InternalGetNextChar()) != 0)
-                    goto ProcessChar;
-
-                int availableChars = PtrDiff(pEnd, pSrc);
-                int availableBytes = PtrDiff(pAllocatedBufferEnd, pTarget);
-
-                // don't fall into the fast decoding loop if we don't have enough characters
-                // Note that if we don't have enough bytes, pStop will prevent us from entering the fast loop.
-                if (availableChars <= 13)
-                {
-                    // we are hoping for 1 byte per char
-                    if (availableBytes < availableChars)
-                    {
-                        // not enough output room.  no pending bits at this point
-                        ch = 0;
-                        continue;
-                    }
-
-                    // try to get over the remainder of the ascii characters fast though
-                    char* pLocalEnd = pEnd; // hint to get pLocalEnd en-registered
-                    while (pSrc < pLocalEnd)
-                    {
-                        ch = *pSrc;
-                        pSrc++;
-
-                        // Not ASCII, need more than 1 byte per char
-                        if (ch > 0x7F)
-                            goto ProcessChar;
-
-                        *pTarget = (byte)ch;
-                        pTarget++;
-                    }
-                    // we are done, let ch be 0 to clear encoder
-                    ch = 0;
-                    break;
-                }
-
-                // we need at least 1 byte per character, but Convert might allow us to convert
-                // only part of the input, so try as much as we can.  Reduce charCount if necessary
-                if (availableBytes < availableChars)
-                {
-                    availableChars = availableBytes;
-                }
-
-                // FASTLOOP:
-                // - optimistic range checks
-                // - fallbacks to the slow loop for all special cases, exception throwing, etc.
+                // Don't care about the exact OperationStatus, just how much of the payload we were able
+                // to process.
 
-                // To compute the upper bound, assume that all characters are ASCII characters at this point,
-                //  the boundary will be decreased for every non-ASCII character we encounter
-                // Also, we need 5 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates
-                // If there aren't enough bytes for the output, then pStop will be <= pSrc and will bypass the loop.
-                char* pStop = pSrc + availableChars - 5;
+                Utf8.ToUtf16(bytes, chars, out int bytesRead, out int charsWritten, replaceInvalidSequences: true, isFinalBlock: decoder is null || decoder.MustFlush);
 
-                while (pSrc < pStop)
-                {
-                    ch = *pSrc;
-                    pSrc++;
-
-                    if (ch > 0x7F)
-                    {
-                        goto LongCode;
-                    }
-                    *pTarget = (byte)ch;
-                    pTarget++;
-
-                    // get pSrc aligned
-                    if ((unchecked((int)pSrc) & 0x2) != 0)
-                    {
-                        ch = *pSrc;
-                        pSrc++;
-                        if (ch > 0x7F)
-                        {
-                            goto LongCode;
-                        }
-                        *pTarget = (byte)ch;
-                        pTarget++;
-                    }
-
-                    // Run 4 characters at a time!
-                    while (pSrc < pStop)
-                    {
-                        ch = *(int*)pSrc;
-                        int chc = *(int*)(pSrc + 2);
-                        if (((ch | chc) & unchecked((int)0xFF80FF80)) != 0)
-                        {
-                            goto LongCodeWithMask;
-                        }
-
-                        // Unfortunately, this is endianess sensitive
-                        if (BitConverter.IsLittleEndian)
-                        {
-                            *pTarget = (byte)ch;
-                            *(pTarget + 1) = (byte)(ch >> 16);
-                            pSrc += 4;
-                            *(pTarget + 2) = (byte)chc;
-                            *(pTarget + 3) = (byte)(chc >> 16);
-                            pTarget += 4;
-                        }
-                        else
-                        {
-                            *pTarget = (byte)(ch>>16);
-                            *(pTarget+1) = (byte)ch;
-                            pSrc += 4;
-                            *(pTarget+2) = (byte)(chc>>16);
-                            *(pTarget+3) = (byte)chc;
-                            pTarget += 4;
-                        }
-                    }
-                    continue;
-
-                LongCodeWithMask:
-                    if (BitConverter.IsLittleEndian)
-                    {
-                        ch = (char)ch;
-                    }
-                    else
-                    {
-                        // be careful about the sign extension
-                        ch = (int)(((uint)ch) >> 16);
-                    }
-                    pSrc++;
-
-                    if (ch > 0x7F)
-                    {
-                        goto LongCode;
-                    }
-                    *pTarget = (byte)ch;
-                    pTarget++;
-                    continue;
-
-                LongCode:
-                    // use separate helper variables for slow and fast loop so that the jit optimizations
-                    // won't get confused about the variable lifetimes
-                    int chd;
-                    if (ch <= 0x7FF)
-                    {
-                        // 2 byte encoding
-                        chd = unchecked((sbyte)0xC0) | (ch >> 6);
-                    }
-                    else
-                    {
-                        // if (!IsLowSurrogate(ch) && !IsHighSurrogate(ch))
-                        if (!InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
-                        {
-                            // 3 byte encoding
-                            chd = unchecked((sbyte)0xE0) | (ch >> 12);
-                        }
-                        else
-                        {
-                            // 4 byte encoding - high surrogate + low surrogate
-                            // if (!IsHighSurrogate(ch))
-                            if (ch > CharUnicodeInfo.HIGH_SURROGATE_END)
-                            {
-                                // low without high -> bad, try again in slow loop
-                                pSrc -= 1;
-                                break;
-                            }
-
-                            chd = *pSrc;
-                            pSrc++;
-
-                            // if (!IsLowSurrogate(chd)) {
-                            if (!InRange(chd, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END))
-                            {
-                                // high not followed by low -> bad, try again in slow loop
-                                pSrc -= 2;
-                                break;
-                            }
-
-                            ch = chd + (ch << 10) +
-                                (0x10000
-                                - CharUnicodeInfo.LOW_SURROGATE_START
-                                - (CharUnicodeInfo.HIGH_SURROGATE_START << 10));
-
-                            *pTarget = (byte)(unchecked((sbyte)0xF0) | (ch >> 18));
-                            // pStop - this byte is compensated by the second surrogate character
-                            // 2 input chars require 4 output bytes.  2 have been anticipated already
-                            // and 2 more will be accounted for by the 2 pStop-- calls below.
-                            pTarget++;
-
-                            chd = unchecked((sbyte)0x80) | (ch >> 12) & 0x3F;
-                        }
-                        *pTarget = (byte)chd;
-                        pStop--;                    // 3 byte sequence for 1 char, so need pStop-- and the one below too.
-                        pTarget++;
-
-                        chd = unchecked((sbyte)0x80) | (ch >> 6) & 0x3F;
-                    }
-                    *pTarget = (byte)chd;
-                    pStop--;                        // 2 byte sequence for 1 char so need pStop--.
-                    pTarget++;
-
-                    *pTarget = (byte)(unchecked((sbyte)0x80) | ch & 0x3F);
-                    // pStop - this byte is already included
-                    pTarget++;
-                }
+                // Slice off how much we consumed / wrote.
 
-                Debug.Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetBytes]pTarget <= pAllocatedBufferEnd");
+                bytes = bytes.Slice(bytesRead);
+                chars = chars.Slice(charsWritten);
+            }
 
-#endif // FASTLOOP
+            // If we couldn't go through our fast fallback mechanism, or if we still have leftover
+            // data because we couldn't consume everything in the loop above, we need to go down the
+            // slow fallback path.
 
-                // no pending char at this point
-                ch = 0;
+            if (bytes.IsEmpty)
+            {
+                return originalCharsLength - chars.Length; // total number of chars written
             }
-
-            // Do we have to set the encoder bytes?
-            if (encoder != null)
+            else
             {
-                Debug.Assert(!encoder.MustFlush || ch == 0,
-                    "[UTF8Encoding.GetBytes] Expected no mustflush or 0 leftover ch " + ch.ToString("X2", CultureInfo.InvariantCulture));
-
-                encoder.surrogateChar = ch;
-                encoder._charsUsed = (int)(pSrc - chars);
+                return base.GetCharsWithFallback(bytes, originalBytesLength, chars, originalCharsLength, decoder);
             }
-
-            Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
-                baseEncoder == null || !baseEncoder._throwOnOverflow,
-                "[UTF8Encoding.GetBytes]Expected empty fallback buffer if not converting");
-
-            return (int)(pTarget - bytes);
         }
 
-
-        // These are bitmasks used to maintain the state in the decoder. They occupy the higher bits
-        // while the actual character is being built in the lower bits. They are shifted together
-        // with the actual bits of the character.
-
-        // bits 30 & 31 are used for pending bits fixup
-        private const int FinalByte = 1 << 29;
-        private const int SupplimentarySeq = 1 << 28;
-        private const int ThreeByteSeq = 1 << 27;
-
-        // Note:  We throw exceptions on individually encoded surrogates and other non-shortest forms.
-        //        If exceptions aren't turned on, then we drop all non-shortest &individual surrogates.
+        // Returns a string containing the decoded representation of a range of
+        // bytes in a byte array.
         //
-        // To simplify maintenance, the structure of GetCharCount and GetChars should be
-        // kept the same as much as possible
-        internal sealed override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
-        {
-            Debug.Assert(count >= 0, "[UTF8Encoding.GetCharCount]count >=0");
-            Debug.Assert(bytes != null, "[UTF8Encoding.GetCharCount]bytes!=null");
-
-            // Initialize stuff
-            byte* pSrc = bytes;
-            byte* pEnd = pSrc + count;
+        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+        // So if you fix this, fix the others.  Currently those include:
+        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+        // parent method is safe
 
-            // Start by assuming we have as many as count, charCount always includes the adjustment
-            // for the character being decoded
-            int charCount = count;
-            int ch = 0;
-            DecoderFallbackBuffer fallback = null;
+        public override unsafe string GetString(byte[] bytes, int index, int count)
+        {
+            // Validate Parameters
 
-            if (baseDecoder != null)
+            if (bytes is null)
             {
-                UTF8Decoder decoder = (UTF8Decoder)baseDecoder;
-                ch = decoder.bits;
-                charCount -= (ch >> 30);        // Adjust char count for # of expected bytes and expected output chars.
-
-                // Shouldn't have anything in fallback buffer for GetCharCount
-                // (don't have to check _throwOnOverflow for count)
-                Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
-                    "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at start");
+                ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array);
             }
 
-            for (;;)
+            if ((index | count) < 0)
             {
-                // SLOWLOOP: does all range checks, handles all special cases, but it is slow
-
-                if (pSrc >= pEnd)
-                {
-                    break;
-                }
-
-                if (ch == 0)
-                {
-                    // no pending bits
-                    goto ReadChar;
-                }
-
-                // read next byte. The JIT optimization seems to be getting confused when
-                // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
-                int cha = *pSrc;
-                pSrc++;
-
-                // we are expecting to see trailing bytes like 10vvvvvv
-                if ((cha & unchecked((sbyte)0xC0)) != 0x80)
-                {
-                    // This can be a valid starting byte for another UTF8 byte sequence, so let's put
-                    // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
-                    pSrc--;
-                    charCount += (ch >> 30);
-                    goto InvalidByteSequence;
-                }
-
-                // fold in the new byte
-                ch = (ch << 6) | (cha & 0x3F);
-
-                if ((ch & FinalByte) == 0)
-                {
-                    Debug.Assert((ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
-                        "[UTF8Encoding.GetChars]Invariant volation");
-
-                    if ((ch & SupplimentarySeq) != 0)
-                    {
-                        if ((ch & (FinalByte >> 6)) != 0)
-                        {
-                            // this is 3rd byte (of 4 byte supplementary) - nothing to do
-                            continue;
-                        }
-
-                        // 2nd byte, check for non-shortest form of supplementary char and the valid
-                        // supplementary characters in range 0x010000 - 0x10FFFF at the same time
-                        if (!InRange(ch & 0x1F0, 0x10, 0x100))
-                        {
-                            goto InvalidByteSequence;
-                        }
-                    }
-                    else
-                    {
-                        // Must be 2nd byte of a 3-byte sequence
-                        // check for non-shortest form of 3 byte seq
-                        if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
-                            (ch & (0xF800 >> 6)) == (0xD800 >> 6))     // illegal individually encoded surrogate
-                        {
-                            goto InvalidByteSequence;
-                        }
-                    }
-                    continue;
-                }
-
-                // ready to punch
-
-                // adjust for surrogates in non-shortest form
-                if ((ch & (SupplimentarySeq | 0x1F0000)) == SupplimentarySeq)
-                {
-                    charCount--;
-                }
-                goto EncodeChar;
-
-            InvalidByteSequence:
-                // this code fragment should be close to the goto referencing it
-                // Have to do fallback for invalid bytes
-                if (fallback == null)
-                {
-                    if (baseDecoder == null)
-                        fallback = this.decoderFallback.CreateFallbackBuffer();
-                    else
-                        fallback = baseDecoder.FallbackBuffer;
-                    fallback.InternalInitialize(bytes, null);
-                }
-                charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
-
-                ch = 0;
-                continue;
-
-            ReadChar:
-                ch = *pSrc;
-                pSrc++;
-
-            ProcessChar:
-                if (ch > 0x7F)
-                {
-                    // If its > 0x7F, its start of a new multi-byte sequence
-
-                    // Long sequence, so unreserve our char.
-                    charCount--;
-
-                    // bit 6 has to be non-zero for start of multibyte chars.
-                    if ((ch & 0x40) == 0)
-                    {
-                        // Unexpected trail byte
-                        goto InvalidByteSequence;
-                    }
-
-                    // start a new long code
-                    if ((ch & 0x20) != 0)
-                    {
-                        if ((ch & 0x10) != 0)
-                        {
-                            // 4 byte encoding - supplimentary character (2 surrogates)
-
-                            ch &= 0x0F;
-
-                            // check that bit 4 is zero and the valid supplimentary character
-                            // range 0x000000 - 0x10FFFF at the same time
-                            if (ch > 0x04)
-                            {
-                                ch |= 0xf0;
-                                goto InvalidByteSequence;
-                            }
-
-                            // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
-                            // Final byte flag, count fix if we don't make final byte & supplimentary sequence flag.
-                            ch |= (FinalByte >> 3 * 6) |  // Final byte is 3 more bytes from now
-                                  (1 << 30) |           // If it dies on next byte we'll need an extra char
-                                  (3 << (30 - 2 * 6)) |     // If it dies on last byte we'll need to subtract a char
-                                (SupplimentarySeq) | (SupplimentarySeq >> 6) |
-                                (SupplimentarySeq >> 2 * 6) | (SupplimentarySeq >> 3 * 6);
-
-                            // Our character count will be 2 characters for these 4 bytes, so subtract another char
-                            charCount--;
-                        }
-                        else
-                        {
-                            // 3 byte encoding
-                            // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
-                            ch = (ch & 0x0F) | ((FinalByte >> 2 * 6) | (1 << 30) |
-                                (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2 * 6));
-
-                            // We'll expect 1 character for these 3 bytes, so subtract another char.
-                            charCount--;
-                        }
-                    }
-                    else
-                    {
-                        // 2 byte encoding
-
-                        ch &= 0x1F;
-
-                        // check for non-shortest form
-                        if (ch <= 1)
-                        {
-                            ch |= 0xc0;
-                            goto InvalidByteSequence;
-                        }
-
-                        // Add bit flags so we'll be flagged correctly
-                        ch |= (FinalByte >> 6);
-                    }
-                    continue;
-                }
-
-            EncodeChar:
-
-#if FASTLOOP
-                int availableBytes = PtrDiff(pEnd, pSrc);
-
-                // don't fall into the fast decoding loop if we don't have enough bytes
-                if (availableBytes <= 13)
-                {
-                    // try to get over the remainder of the ascii characters fast though
-                    byte* pLocalEnd = pEnd; // hint to get pLocalEnd en-registered
-                    while (pSrc < pLocalEnd)
-                    {
-                        ch = *pSrc;
-                        pSrc++;
-
-                        if (ch > 0x7F)
-                            goto ProcessChar;
-                    }
-                    // we are done
-                    ch = 0;
-                    break;
-                }
-
-                // To compute the upper bound, assume that all characters are ASCII characters at this point,
-                //  the boundary will be decreased for every non-ASCII character we encounter
-                // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
-                byte* pStop = pSrc + availableBytes - 7;
-
-                while (pSrc < pStop)
-                {
-                    ch = *pSrc;
-                    pSrc++;
-
-                    if (ch > 0x7F)
-                    {
-                        goto LongCode;
-                    }
-
-                    // get pSrc 2-byte aligned
-                    if ((unchecked((int)pSrc) & 0x1) != 0)
-                    {
-                        ch = *pSrc;
-                        pSrc++;
-                        if (ch > 0x7F)
-                        {
-                            goto LongCode;
-                        }
-                    }
-
-                    // get pSrc 4-byte aligned
-                    if ((unchecked((int)pSrc) & 0x2) != 0)
-                    {
-                        ch = *(ushort*)pSrc;
-                        if ((ch & 0x8080) != 0)
-                        {
-                            goto LongCodeWithMask16;
-                        }
-                        pSrc += 2;
-                    }
-
-                    // Run 8 + 8 characters at a time!
-                    while (pSrc < pStop)
-                    {
-                        ch = *(int*)pSrc;
-                        int chb = *(int*)(pSrc + 4);
-                        if (((ch | chb) & unchecked((int)0x80808080)) != 0)
-                        {
-                            goto LongCodeWithMask32;
-                        }
-                        pSrc += 8;
-
-                        // This is a really small loop - unroll it
-                        if (pSrc >= pStop)
-                            break;
-
-                        ch = *(int*)pSrc;
-                        chb = *(int*)(pSrc + 4);
-                        if (((ch | chb) & unchecked((int)0x80808080)) != 0)
-                        {
-                            goto LongCodeWithMask32;
-                        }
-                        pSrc += 8;
-                    }
-                    break;
-
-                LongCodeWithMask32:
-                    if (BitConverter.IsLittleEndian)
-                    {
-                        ch &= 0xFF;
-                    }
-                    else
-                    {
-                        // be careful about the sign extension
-                        ch = (int)(((uint)ch) >> 16);
-                    }
-                LongCodeWithMask16:
-                    if (BitConverter.IsLittleEndian)
-                    {
-                        ch &= 0xFF;
-                    }
-                    else
-                    {
-                        ch = (int)(((uint)ch) >> 8);
-                    }
-
-                    pSrc++;
-                    if (ch <= 0x7F)
-                    {
-                        continue;
-                    }
-
-                LongCode:
-                    int chc = *pSrc;
-                    pSrc++;
-
-                    if (
-                        // bit 6 has to be zero
-                        (ch & 0x40) == 0 ||
-                        // we are expecting to see trailing bytes like 10vvvvvv
-                        (chc & unchecked((sbyte)0xC0)) != 0x80)
-                    {
-                        goto BadLongCode;
-                    }
-
-                    chc &= 0x3F;
-
-                    // start a new long code
-                    if ((ch & 0x20) != 0)
-                    {
-                        // fold the first two bytes together
-                        chc |= (ch & 0x0F) << 6;
-
-                        if ((ch & 0x10) != 0)
-                        {
-                            // 4 byte encoding - surrogate
-                            ch = *pSrc;
-                            if (
-                                // check that bit 4 is zero, the non-shortest form of surrogate
-                                // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
-                                !InRange(chc >> 4, 0x01, 0x10) ||
-                                // we are expecting to see trailing bytes like 10vvvvvv
-                                (ch & unchecked((sbyte)0xC0)) != 0x80)
-                            {
-                                goto BadLongCode;
-                            }
-
-                            chc = (chc << 6) | (ch & 0x3F);
-
-                            ch = *(pSrc + 1);
-                            // we are expecting to see trailing bytes like 10vvvvvv
-                            if ((ch & unchecked((sbyte)0xC0)) != 0x80)
-                            {
-                                goto BadLongCode;
-                            }
-                            pSrc += 2;
-
-                            // extra byte
-                            charCount--;
-                        }
-                        else
-                        {
-                            // 3 byte encoding
-                            ch = *pSrc;
-                            if (
-                                // check for non-shortest form of 3 byte seq
-                                (chc & (0x1F << 5)) == 0 ||
-                                // Can't have surrogates here.
-                                (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
-                                // we are expecting to see trailing bytes like 10vvvvvv
-                                (ch & unchecked((sbyte)0xC0)) != 0x80)
-                            {
-                                goto BadLongCode;
-                            }
-                            pSrc++;
-
-                            // extra byte
-                            charCount--;
-                        }
-                    }
-                    else
-                    {
-                        // 2 byte encoding
-
-                        // check for non-shortest form
-                        if ((ch & 0x1E) == 0)
-                        {
-                            goto BadLongCode;
-                        }
-                    }
-
-                    // extra byte
-                    charCount--;
-                }
-#endif // FASTLOOP
-
-                // no pending bits at this point
-                ch = 0;
-                continue;
-
-            BadLongCode:
-                pSrc -= 2;
-                ch = 0;
-                continue;
+                ThrowHelper.ThrowArgumentOutOfRangeException(
+                    argument: (index < 0) ? ExceptionArgument.index : ExceptionArgument.count,
+                    resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
             }
 
-            // May have a problem if we have to flush
-            if (ch != 0)
+            if (bytes.Length - index < count)
             {
-                // We were already adjusting for these, so need to un-adjust
-                charCount += (ch >> 30);
-                if (baseDecoder == null || baseDecoder.MustFlush)
-                {
-                    // Have to do fallback for invalid bytes
-                    if (fallback == null)
-                    {
-                        if (baseDecoder == null)
-                            fallback = this.decoderFallback.CreateFallbackBuffer();
-                        else
-                            fallback = baseDecoder.FallbackBuffer;
-                        fallback.InternalInitialize(bytes, null);
-                    }
-                    charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
-                }
+                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
             }
 
-            // Shouldn't have anything in fallback buffer for GetCharCount
-            // (don't have to check _throwOnOverflow for count)
-            Debug.Assert(fallback == null || fallback.Remaining == 0,
-                "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at end");
+            // Avoid problems with empty input buffer
+            if (count == 0)
+                return string.Empty;
 
-            return charCount;
+            fixed (byte* pBytes = bytes)
+            {
+                return string.CreateStringFromEncoding(pBytes + index, count, this);
+            }
         }
 
-        // WARNING:  If we throw an error, then System.Resources.ResourceReader calls this method.
-        //           So if we're really broken, then that could also throw an error... recursively.
-        //           So try to make sure GetChars can at least process all uses by
-        //           System.Resources.ResourceReader!
         //
-        // Note:  We throw exceptions on individually encoded surrogates and other non-shortest forms.
-        //        If exceptions aren't turned on, then we drop all non-shortest &individual surrogates.
+        // End of standard methods copied from EncodingNLS.cs
         //
-        // To simplify maintenance, the structure of GetCharCount and GetChars should be
-        // kept the same as much as possible
-        internal sealed override unsafe int GetChars(
-            byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder)
-        {
-            Debug.Assert(chars != null, "[UTF8Encoding.GetChars]chars!=null");
-            Debug.Assert(byteCount >= 0, "[UTF8Encoding.GetChars]count >=0");
-            Debug.Assert(charCount >= 0, "[UTF8Encoding.GetChars]charCount >=0");
-            Debug.Assert(bytes != null, "[UTF8Encoding.GetChars]bytes!=null");
-
-            byte* pSrc = bytes;
-            char* pTarget = chars;
-
-            byte* pEnd = pSrc + byteCount;
-            char* pAllocatedBufferEnd = pTarget + charCount;
-
-            int ch = 0;
-
-            DecoderFallbackBuffer fallback = null;
-            byte* pSrcForFallback;
-            char* pTargetForFallback;
-            if (baseDecoder != null)
-            {
-                UTF8Decoder decoder = (UTF8Decoder)baseDecoder;
-                ch = decoder.bits;
-
-                // Shouldn't have anything in fallback buffer for GetChars
-                // (don't have to check _throwOnOverflow for chars, we always use all or none so always should be empty)
-                Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
-                    "[UTF8Encoding.GetChars]Expected empty fallback buffer at start");
-            }
 
-            for (;;)
-            {
-                // SLOWLOOP: does all range checks, handles all special cases, but it is slow
-
-                if (pSrc >= pEnd)
-                {
-                    break;
-                }
-
-                if (ch == 0)
-                {
-                    // no pending bits
-                    goto ReadChar;
-                }
-
-                // read next byte. The JIT optimization seems to be getting confused when
-                // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
-                int cha = *pSrc;
-                pSrc++;
-
-                // we are expecting to see trailing bytes like 10vvvvvv
-                if ((cha & unchecked((sbyte)0xC0)) != 0x80)
-                {
-                    // This can be a valid starting byte for another UTF8 byte sequence, so let's put
-                    // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
-                    pSrc--;
-                    goto InvalidByteSequence;
-                }
-
-                // fold in the new byte
-                ch = (ch << 6) | (cha & 0x3F);
-
-                if ((ch & FinalByte) == 0)
-                {
-                    // Not at last byte yet
-                    Debug.Assert((ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
-                        "[UTF8Encoding.GetChars]Invariant volation");
-
-                    if ((ch & SupplimentarySeq) != 0)
-                    {
-                        // Its a 4-byte supplimentary sequence
-                        if ((ch & (FinalByte >> 6)) != 0)
-                        {
-                            // this is 3rd byte of 4 byte sequence - nothing to do
-                            continue;
-                        }
-
-                        // 2nd byte of 4 bytes
-                        // check for non-shortest form of surrogate and the valid surrogate
-                        // range 0x000000 - 0x10FFFF at the same time
-                        if (!InRange(ch & 0x1F0, 0x10, 0x100))
-                        {
-                            goto InvalidByteSequence;
-                        }
-                    }
-                    else
-                    {
-                        // Must be 2nd byte of a 3-byte sequence
-                        // check for non-shortest form of 3 byte seq
-                        if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
-                            (ch & (0xF800 >> 6)) == (0xD800 >> 6))     // illegal individually encoded surrogate
-                        {
-                            goto InvalidByteSequence;
-                        }
-                    }
-                    continue;
-                }
-
-                // ready to punch
-
-                // surrogate in shortest form?
-                // Might be possible to get rid of this?  Already did non-shortest check for 4-byte sequence when reading 2nd byte?
-                if ((ch & (SupplimentarySeq | 0x1F0000)) > SupplimentarySeq)
-                {
-                    // let the range check for the second char throw the exception
-                    if (pTarget < pAllocatedBufferEnd)
-                    {
-                        *pTarget = (char)(((ch >> 10) & 0x7FF) +
-                            unchecked((short)((CharUnicodeInfo.HIGH_SURROGATE_START - (0x10000 >> 10)))));
-                        pTarget++;
-
-                        ch = (ch & 0x3FF) +
-                            unchecked((int)(CharUnicodeInfo.LOW_SURROGATE_START));
-                    }
-                }
-
-                goto EncodeChar;
-
-            InvalidByteSequence:
-                // this code fragment should be close to the gotos referencing it
-                // Have to do fallback for invalid bytes
-                if (fallback == null)
-                {
-                    if (baseDecoder == null)
-                        fallback = this.decoderFallback.CreateFallbackBuffer();
-                    else
-                        fallback = baseDecoder.FallbackBuffer;
-                    fallback.InternalInitialize(bytes, pAllocatedBufferEnd);
-                }
-                // That'll back us up the appropriate # of bytes if we didn't get anywhere
-                pSrcForFallback = pSrc; // Avoid passing pSrc by reference to allow it to be en-registered
-                pTargetForFallback = pTarget; // Avoid passing pTarget by reference to allow it to be en-registered
-                bool fallbackResult = FallbackInvalidByteSequence(ref pSrcForFallback, ch, fallback, ref pTargetForFallback);
-                pSrc = pSrcForFallback;
-                pTarget = pTargetForFallback;
-
-                if (!fallbackResult)
-                {
-                    // Ran out of buffer space
-                    // Need to throw an exception?
-                    Debug.Assert(pSrc >= bytes || pTarget == chars,
-                        "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer after fallback");
-                    fallback.InternalReset();
-                    ThrowCharsOverflow(baseDecoder, pTarget == chars);
-                    ch = 0;
-                    break;
-                }
-                Debug.Assert(pSrc >= bytes,
-                    "[UTF8Encoding.GetChars]Expected invalid byte sequence to have remained within the byte array");
-                ch = 0;
-                continue;
-
-            ReadChar:
-                ch = *pSrc;
-                pSrc++;
-
-            ProcessChar:
-                if (ch > 0x7F)
-                {
-                    // If its > 0x7F, its start of a new multi-byte sequence
-
-                    // bit 6 has to be non-zero
-                    if ((ch & 0x40) == 0)
-                    {
-                        goto InvalidByteSequence;
-                    }
-
-                    // start a new long code
-                    if ((ch & 0x20) != 0)
-                    {
-                        if ((ch & 0x10) != 0)
-                        {
-                            // 4 byte encoding - supplimentary character (2 surrogates)
-
-                            ch &= 0x0F;
-
-                            // check that bit 4 is zero and the valid supplimentary character
-                            // range 0x000000 - 0x10FFFF at the same time
-                            if (ch > 0x04)
-                            {
-                                ch |= 0xf0;
-                                goto InvalidByteSequence;
-                            }
-
-                            ch |= (FinalByte >> 3 * 6) | (1 << 30) | (3 << (30 - 2 * 6)) |
-                                (SupplimentarySeq) | (SupplimentarySeq >> 6) |
-                                (SupplimentarySeq >> 2 * 6) | (SupplimentarySeq >> 3 * 6);
-                        }
-                        else
-                        {
-                            // 3 byte encoding
-                            ch = (ch & 0x0F) | ((FinalByte >> 2 * 6) | (1 << 30) |
-                                (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2 * 6));
-                        }
-                    }
-                    else
-                    {
-                        // 2 byte encoding
-
-                        ch &= 0x1F;
-
-                        // check for non-shortest form
-                        if (ch <= 1)
-                        {
-                            ch |= 0xc0;
-                            goto InvalidByteSequence;
-                        }
-
-                        ch |= (FinalByte >> 6);
-                    }
-                    continue;
-                }
-
-            EncodeChar:
-                // write the pending character
-                if (pTarget >= pAllocatedBufferEnd)
-                {
-                    // Fix chars so we make sure to throw if we didn't output anything
-                    ch &= 0x1fffff;
-                    if (ch > 0x7f)
-                    {
-                        if (ch > 0x7ff)
-                        {
-                            if (ch >= CharUnicodeInfo.LOW_SURROGATE_START &&
-                                ch <= CharUnicodeInfo.LOW_SURROGATE_END)
-                            {
-                                pSrc--;     // It was 4 bytes
-                                pTarget--;  // 1 was stored already, but we can't remember 1/2, so back up
-                            }
-                            else if (ch > 0xffff)
-                            {
-                                pSrc--;     // It was 4 bytes, nothing was stored
-                            }
-                            pSrc--;         // It was at least 3 bytes
-                        }
-                        pSrc--;             // It was at least 2 bytes
-                    }
-                    pSrc--;
-
-                    // Throw that we don't have enough room (pSrc could be < chars if we had started to process
-                    // a 4 byte sequence already)
-                    Debug.Assert(pSrc >= bytes || pTarget == chars,
-                        "[UTF8Encoding.GetChars]Expected pSrc to be within input buffer or throw due to no output]");
-                    ThrowCharsOverflow(baseDecoder, pTarget == chars);
-
-                    // Don't store ch in decoder, we already backed up to its start
-                    ch = 0;
-
-                    // Didn't throw, just use this buffer size.
-                    break;
-                }
-                *pTarget = (char)ch;
-                pTarget++;
-
-#if FASTLOOP
-                int availableChars = PtrDiff(pAllocatedBufferEnd, pTarget);
-                int availableBytes = PtrDiff(pEnd, pSrc);
-
-                // don't fall into the fast decoding loop if we don't have enough bytes
-                // Test for availableChars is done because pStop would be <= pTarget.
-                if (availableBytes <= 13)
-                {
-                    // we may need as many as 1 character per byte
-                    if (availableChars < availableBytes)
-                    {
-                        // not enough output room.  no pending bits at this point
-                        ch = 0;
-                        continue;
-                    }
-
-                    // try to get over the remainder of the ascii characters fast though
-                    byte* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
-                    while (pSrc < pLocalEnd)
-                    {
-                        ch = *pSrc;
-                        pSrc++;
-
-                        if (ch > 0x7F)
-                            goto ProcessChar;
-
-                        *pTarget = (char)ch;
-                        pTarget++;
-                    }
-                    // we are done
-                    ch = 0;
-                    break;
-                }
-
-                // we may need as many as 1 character per byte, so reduce the byte count if necessary.
-                // If availableChars is too small, pStop will be before pTarget and we won't do fast loop.
-                if (availableChars < availableBytes)
-                {
-                    availableBytes = availableChars;
-                }
-
-                // To compute the upper bound, assume that all characters are ASCII characters at this point,
-                //  the boundary will be decreased for every non-ASCII character we encounter
-                // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
-                char* pStop = pTarget + availableBytes - 7;
-
-                while (pTarget < pStop)
-                {
-                    ch = *pSrc;
-                    pSrc++;
-
-                    if (ch > 0x7F)
-                    {
-                        goto LongCode;
-                    }
-                    *pTarget = (char)ch;
-                    pTarget++;
-
-                    // get pSrc to be 2-byte aligned
-                    if ((unchecked((int)pSrc) & 0x1) != 0)
-                    {
-                        ch = *pSrc;
-                        pSrc++;
-                        if (ch > 0x7F)
-                        {
-                            goto LongCode;
-                        }
-                        *pTarget = (char)ch;
-                        pTarget++;
-                    }
-
-                    // get pSrc to be 4-byte aligned
-                    if ((unchecked((int)pSrc) & 0x2) != 0)
-                    {
-                        ch = *(ushort*)pSrc;
-                        if ((ch & 0x8080) != 0)
-                        {
-                            goto LongCodeWithMask16;
-                        }
-
-                        // Unfortunately, this is endianess sensitive
-                        if (BitConverter.IsLittleEndian)
-                        {
-                            *pTarget = (char)(ch & 0x7F);
-                            pSrc += 2;
-                            *(pTarget + 1) = (char)((ch >> 8) & 0x7F);
-                            pTarget += 2;
-                        }
-                        else
-                        {
-                            *pTarget = (char)((ch >> 8) & 0x7F);
-                            pSrc += 2;
-                            *(pTarget+1) = (char)(ch & 0x7F);
-                            pTarget += 2;
-                        }
-                    }
-
-                    // Run 8 characters at a time!
-                    while (pTarget < pStop)
-                    {
-                        ch = *(int*)pSrc;
-                        int chb = *(int*)(pSrc + 4);
-                        if (((ch | chb) & unchecked((int)0x80808080)) != 0)
-                        {
-                            goto LongCodeWithMask32;
-                        }
-
-                        // Unfortunately, this is endianess sensitive
-                        if (BitConverter.IsLittleEndian)
-                        {
-                            *pTarget = (char)(ch & 0x7F);
-                            *(pTarget + 1) = (char)((ch >> 8) & 0x7F);
-                            *(pTarget + 2) = (char)((ch >> 16) & 0x7F);
-                            *(pTarget + 3) = (char)((ch >> 24) & 0x7F);
-                            pSrc += 8;
-                            *(pTarget + 4) = (char)(chb & 0x7F);
-                            *(pTarget + 5) = (char)((chb >> 8) & 0x7F);
-                            *(pTarget + 6) = (char)((chb >> 16) & 0x7F);
-                            *(pTarget + 7) = (char)((chb >> 24) & 0x7F);
-                            pTarget += 8;
-                        }
-                        else
-                        {
-                            *pTarget = (char)((ch >> 24) & 0x7F);
-                            *(pTarget+1) = (char)((ch >> 16) & 0x7F);
-                            *(pTarget+2) = (char)((ch >> 8) & 0x7F);
-                            *(pTarget+3) = (char)(ch & 0x7F);
-                            pSrc += 8;
-                            *(pTarget+4) = (char)((chb >> 24) & 0x7F);
-                            *(pTarget+5) = (char)((chb >> 16) & 0x7F);
-                            *(pTarget+6) = (char)((chb >> 8) & 0x7F);
-                            *(pTarget+7) = (char)(chb & 0x7F);
-                            pTarget += 8;
-                        }
-                    }
-                    break;
-
-                LongCodeWithMask32:
-                    if (BitConverter.IsLittleEndian)
-                    {
-                        ch &= 0xFF;
-                    }
-                    else
-                    {
-                        // be careful about the sign extension
-                        ch = (int)(((uint)ch) >> 16);
-                    }
-                LongCodeWithMask16:
-                    if (BitConverter.IsLittleEndian)
-                    {
-                        ch &= 0xFF;
-                    }
-                    else
-                    {
-                        ch = (int)(((uint)ch) >> 8);
-                    }
-                    pSrc++;
-                    if (ch <= 0x7F)
-                    {
-                        *pTarget = (char)ch;
-                        pTarget++;
-                        continue;
-                    }
-
-                LongCode:
-                    int chc = *pSrc;
-                    pSrc++;
-
-                    if (
-                        // bit 6 has to be zero
-                        (ch & 0x40) == 0 ||
-                        // we are expecting to see trailing bytes like 10vvvvvv
-                        (chc & unchecked((sbyte)0xC0)) != 0x80)
-                    {
-                        goto BadLongCode;
-                    }
-
-                    chc &= 0x3F;
-
-                    // start a new long code
-                    if ((ch & 0x20) != 0)
-                    {
-                        // fold the first two bytes together
-                        chc |= (ch & 0x0F) << 6;
-
-                        if ((ch & 0x10) != 0)
-                        {
-                            // 4 byte encoding - surrogate
-                            ch = *pSrc;
-                            if (
-                                // check that bit 4 is zero, the non-shortest form of surrogate
-                                // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
-                                !InRange(chc >> 4, 0x01, 0x10) ||
-                                // we are expecting to see trailing bytes like 10vvvvvv
-                                (ch & unchecked((sbyte)0xC0)) != 0x80)
-                            {
-                                goto BadLongCode;
-                            }
-
-                            chc = (chc << 6) | (ch & 0x3F);
-
-                            ch = *(pSrc + 1);
-                            // we are expecting to see trailing bytes like 10vvvvvv
-                            if ((ch & unchecked((sbyte)0xC0)) != 0x80)
-                            {
-                                goto BadLongCode;
-                            }
-                            pSrc += 2;
-
-                            ch = (chc << 6) | (ch & 0x3F);
-
-                            *pTarget = (char)(((ch >> 10) & 0x7FF) +
-                                unchecked((short)(CharUnicodeInfo.HIGH_SURROGATE_START - (0x10000 >> 10))));
-                            pTarget++;
-
-                            ch = (ch & 0x3FF) +
-                                unchecked((short)(CharUnicodeInfo.LOW_SURROGATE_START));
-
-                            // extra byte, we're already planning 2 chars for 2 of these bytes,
-                            // but the big loop is testing the target against pStop, so we need
-                            // to subtract 2 more or we risk overrunning the input.  Subtract
-                            // one here and one below.
-                            pStop--;
-                        }
-                        else
-                        {
-                            // 3 byte encoding
-                            ch = *pSrc;
-                            if (
-                                // check for non-shortest form of 3 byte seq
-                                (chc & (0x1F << 5)) == 0 ||
-                                // Can't have surrogates here.
-                                (chc & (0xF800 >> 6)) == (0xD800 >> 6) ||
-                                // we are expecting to see trailing bytes like 10vvvvvv
-                                (ch & unchecked((sbyte)0xC0)) != 0x80)
-                            {
-                                goto BadLongCode;
-                            }
-                            pSrc++;
-
-                            ch = (chc << 6) | (ch & 0x3F);
-
-                            // extra byte, we're only expecting 1 char for each of these 3 bytes,
-                            // but the loop is testing the target (not source) against pStop, so
-                            // we need to subtract 2 more or we risk overrunning the input.
-                            // Subtract 1 here and one more below
-                            pStop--;
-                        }
-                    }
-                    else
-                    {
-                        // 2 byte encoding
-
-                        ch &= 0x1F;
-
-                        // check for non-shortest form
-                        if (ch <= 1)
-                        {
-                            goto BadLongCode;
-                        }
-                        ch = (ch << 6) | chc;
-                    }
-
-                    *pTarget = (char)ch;
-                    pTarget++;
-
-                    // extra byte, we're only expecting 1 char for each of these 2 bytes,
-                    // but the loop is testing the target (not source) against pStop.
-                    // subtract an extra count from pStop so that we don't overrun the input.
-                    pStop--;
-                }
-#endif // FASTLOOP
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private unsafe int GetCharCountCommon(byte* pBytes, int byteCount)
+        {
+            // Common helper method for all non-DecoderNLS entry points to GetCharCount.
+            // A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
 
-                Debug.Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetChars]pTarget <= pAllocatedBufferEnd");
+            Debug.Assert(byteCount >= 0, "Caller should't specify negative length buffer.");
+            Debug.Assert(pBytes != null || byteCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
 
-                // no pending bits at this point
-                ch = 0;
-                continue;
+            // First call into the fast path.
+            // Don't bother providing a fallback mechanism; our fast path doesn't use it.
 
-            BadLongCode:
-                pSrc -= 2;
-                ch = 0;
-                continue;
-            }
+            int totalCharCount = GetCharCountFast(pBytes, byteCount, fallback: null, out int bytesConsumed);
 
-            if (ch != 0 && (baseDecoder == null || baseDecoder.MustFlush))
+            if (bytesConsumed != byteCount)
             {
-                // Have to do fallback for invalid bytes
-                if (fallback == null)
-                {
-                    if (baseDecoder == null)
-                        fallback = this.decoderFallback.CreateFallbackBuffer();
-                    else
-                        fallback = baseDecoder.FallbackBuffer;
-                    fallback.InternalInitialize(bytes, pAllocatedBufferEnd);
-                }
-
-                // That'll back us up the appropriate # of bytes if we didn't get anywhere
-                pSrcForFallback = pSrc; // Avoid passing pSrc by reference to allow it to be en-registered
-                pTargetForFallback = pTarget; // Avoid passing pTarget by reference to allow it to be en-registered
-                bool fallbackResult = FallbackInvalidByteSequence(ref pSrcForFallback, ch, fallback, ref pTargetForFallback);
-                pSrc = pSrcForFallback;
-                pTarget = pTargetForFallback;
+                // If there's still data remaining in the source buffer, go down the fallback path.
+                // We need to check for integer overflow since the fallback could change the required
+                // output count in unexpected ways.
 
-                if (!fallbackResult)
+                totalCharCount += GetCharCountWithFallback(pBytes, byteCount, bytesConsumed);
+                if (totalCharCount < 0)
                 {
-                    Debug.Assert(pSrc >= bytes || pTarget == chars,
-                        "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer while flushing");
-
-                    // Ran out of buffer space
-                    // Need to throw an exception?
-                    fallback.InternalReset();
-                    ThrowCharsOverflow(baseDecoder, pTarget == chars);
+                    ThrowConversionOverflow();
                 }
-                Debug.Assert(pSrc >= bytes,
-                    "[UTF8Encoding.GetChars]Expected flushing invalid byte sequence to have remained within the byte array");
-                ch = 0;
             }
 
-            if (baseDecoder != null)
-            {
-                UTF8Decoder decoder = (UTF8Decoder)baseDecoder;
+            return totalCharCount;
+        }
 
-                // If we're storing flush data we expect all bits to be used or else
-                // we're stuck in the middle of a conversion
-                Debug.Assert(!baseDecoder.MustFlush || ch == 0 || !baseDecoder._throwOnOverflow,
-                    "[UTF8Encoding.GetChars]Expected no must flush or no left over bits or no throw on overflow.");
+        [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetCharCountCommon
+        private protected sealed override unsafe int GetCharCountFast(byte* pBytes, int bytesLength, DecoderFallback fallback, out int bytesConsumed)
+        {
+            // The number of UTF-16 code units will never exceed the number of UTF-8 code units,
+            // so the addition at the end of this method will not overflow.
 
-                // Remember our leftover bits.
-                decoder.bits = ch;
+            byte* ptrToFirstInvalidByte = Utf8Utility.GetPointerToFirstInvalidByte(pBytes, bytesLength, out int utf16CodeUnitCountAdjustment, out _);
 
-                baseDecoder._bytesUsed = (int)(pSrc - bytes);
-            }
+            int tempBytesConsumed = (int)(ptrToFirstInvalidByte - pBytes);
+            bytesConsumed = tempBytesConsumed;
 
-            // Shouldn't have anything in fallback buffer for GetChars
-            // (don't have to check _throwOnOverflow for chars)
-            Debug.Assert(fallback == null || fallback.Remaining == 0,
-                "[UTF8Encoding.GetChars]Expected empty fallback buffer at end");
-
-            return PtrDiff(pTarget, chars);
+            return tempBytesConsumed + utf16CodeUnitCountAdjustment;
         }
 
-        // During GetChars we had an invalid byte sequence
-        // pSrc is backed up to the start of the bad sequence if we didn't have room to
-        // fall it back.  Otherwise pSrc remains where it is.
-        private unsafe bool FallbackInvalidByteSequence(
-            ref byte* pSrc, int ch, DecoderFallbackBuffer fallback, ref char* pTarget)
+        public override Decoder GetDecoder()
         {
-            // Get our byte[]
-            byte* pStart = pSrc;
-            byte[] bytesUnknown = GetBytesUnknown(ref pStart, ch);
-
-            // Do the actual fallback
-            if (!fallback.InternalFallback(bytesUnknown, pSrc, ref pTarget))
-            {
-                // Oops, it failed, back up to pStart
-                pSrc = pStart;
-                return false;
-            }
-
-            // It worked
-            return true;
+            return new DecoderNLS(this);
         }
 
-        // During GetCharCount we had an invalid byte sequence
-        // pSrc is used to find the index that points to the invalid bytes,
-        // however the byte[] contains the fallback bytes (in case the index is -1)
-        private unsafe int FallbackInvalidByteSequence(
-            byte* pSrc, int ch, DecoderFallbackBuffer fallback)
+
+        public override Encoder GetEncoder()
         {
-            // Calling GetBytesUnknown can adjust the pSrc pointer but we need to pass the pointer before the adjustment
-            // to fallback.InternalFallback. The input pSrc to fallback.InternalFallback will only be used to calculate the
-            // index inside bytesUnknown and if we pass the adjusted pointer we can end up with negative index values.
-            // We store the original pSrc in pOriginalSrc and then pass pOriginalSrc to fallback.InternalFallback.
-            byte* pOriginalSrc = pSrc;
-
-            // Get our byte[]
-            byte[] bytesUnknown = GetBytesUnknown(ref pSrc, ch);
-
-            // Do the actual fallback
-            int count = fallback.InternalFallback(bytesUnknown, pOriginalSrc);
-
-            // # of fallback chars expected.
-            // Note that we only get here for "long" sequences, and have already unreserved
-            // the count that we prereserved for the input bytes
-            return count;
+            return new EncoderNLS(this);
         }
 
-        // Note that some of these bytes may have come from a previous fallback, so we cannot
-        // just decrement the pointer and use the values we read.  In those cases we have
-        // to regenerate the original values.
-        private unsafe byte[] GetBytesUnknown(ref byte* pSrc, int ch)
-        {
-            // Get our byte[]
-            byte[] bytesUnknown = null;
+        //
+        // Beginning of methods used by shared fallback logic.
+        //
 
-            // See if it was a plain char
-            // (have to check >= 0 because we have all sorts of wierd bit flags)
-            if (ch < 0x100 && ch >= 0)
-            {
-                pSrc--;
-                bytesUnknown = new byte[] { unchecked((byte)ch) };
-            }
-            // See if its an unfinished 2 byte sequence
-            else if ((ch & (SupplimentarySeq | ThreeByteSeq)) == 0)
-            {
-                pSrc--;
-                bytesUnknown = new byte[] { unchecked((byte)((ch & 0x1F) | 0xc0)) };
-            }
-            // So now we're either 2nd byte of 3 or 4 byte sequence or
-            // we hit a non-trail byte or we ran out of space for 3rd byte of 4 byte sequence
-            // 1st check if its a 4 byte sequence
-            else if ((ch & SupplimentarySeq) != 0)
-            {
-                //  3rd byte of 4 byte sequence?
-                if ((ch & (FinalByte >> 6)) != 0)
-                {
-                    // 3rd byte of 4 byte sequence
-                    pSrc -= 3;
-                    bytesUnknown = new byte[] {
-                        unchecked((byte)(((ch >> 12) & 0x07) | 0xF0)),
-                        unchecked((byte)(((ch >> 6) & 0x3F) | 0x80)),
-                        unchecked((byte)(((ch) & 0x3F) | 0x80)) };
-                }
-                else if ((ch & (FinalByte >> 12)) != 0)
-                {
-                    // 2nd byte of a 4 byte sequence
-                    pSrc -= 2;
-                    bytesUnknown = new byte[] {
-                        unchecked((byte)(((ch >> 6) & 0x07) | 0xF0)),
-                        unchecked((byte)(((ch) & 0x3F) | 0x80)) };
-                }
-                else
-                {
-                    // 4th byte of a 4 byte sequence
-                    pSrc--;
-                    bytesUnknown = new byte[] { unchecked((byte)(((ch) & 0x07) | 0xF0)) };
-                }
-            }
-            else
-            {
-                // 2nd byte of 3 byte sequence?
-                if ((ch & (FinalByte >> 6)) != 0)
-                {
-                    // So its 2nd byte of a 3 byte sequence
-                    pSrc -= 2;
-                    bytesUnknown = new byte[] {
-                        unchecked((byte)(((ch >> 6) & 0x0F) | 0xE0)), unchecked ((byte)(((ch) & 0x3F) | 0x80)) };
-                }
-                else
-                {
-                    // 1st byte of a 3 byte sequence
-                    pSrc--;
-                    bytesUnknown = new byte[] { unchecked((byte)(((ch) & 0x0F) | 0xE0)) };
-                }
-            }
+        internal sealed override bool TryGetByteCount(Rune value, out int byteCount)
+        {
+            // All well-formed Rune instances can be converted to 1..4 UTF-8 code units.
 
-            return bytesUnknown;
+            byteCount = value.Utf8SequenceLength;
+            return true;
         }
 
-
-        public override Decoder GetDecoder()
+        internal sealed override OperationStatus EncodeRune(Rune value, Span<byte> bytes, out int bytesWritten)
         {
-            return new UTF8Decoder(this);
-        }
+            // All well-formed Rune instances can be encoded as 1..4 UTF-8 code units.
+            // If there's an error, it's because the destination was too small.
 
+            return value.TryEncodeToUtf8(bytes, out bytesWritten) ? OperationStatus.Done : OperationStatus.DestinationTooSmall;
+        }
 
-        public override Encoder GetEncoder()
+        internal sealed override OperationStatus DecodeFirstRune(ReadOnlySpan<byte> bytes, out Rune value, out int bytesConsumed)
         {
-            return new UTF8Encoder(this);
+            return Rune.DecodeFromUtf8(bytes, out value, out bytesConsumed);
         }
 
+        //
+        // End of methods used by shared fallback logic.
+        //
 
         public override int GetMaxByteCount(int charCount)
         {
@@ -2571,62 +858,5 @@ namespace System.Text
             return this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
                    UTF8_CODEPAGE + (_emitUTF8Identifier ? 1 : 0);
         }
-
-        private sealed class UTF8Encoder : EncoderNLS
-        {
-            // We must save a high surrogate value until the next call, looking
-            // for a low surrogate value.
-            internal int surrogateChar;
-
-            public UTF8Encoder(UTF8Encoding encoding) : base(encoding)
-            {
-                // base calls reset
-            }
-
-            public override void Reset()
-
-            {
-                this.surrogateChar = 0;
-                if (_fallbackBuffer != null)
-                    _fallbackBuffer.Reset();
-            }
-
-            // Anything left in our encoder?
-            internal override bool HasState
-            {
-                get
-                {
-                    return (this.surrogateChar != 0);
-                }
-            }
-        }
-
-        private sealed class UTF8Decoder : DecoderNLS
-        {
-            // We'll need to remember the previous information. See the comments around definition
-            // of FinalByte for details.
-            internal int bits;
-
-            public UTF8Decoder(UTF8Encoding encoding) : base(encoding)
-            {
-                // base calls reset
-            }
-
-            public override void Reset()
-            {
-                this.bits = 0;
-                if (_fallbackBuffer != null)
-                    _fallbackBuffer.Reset();
-            }
-
-            // Anything left in our decoder?
-            internal override bool HasState
-            {
-                get
-                {
-                    return (this.bits != 0);
-                }
-            }
-        }
     }
 }
diff --git a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs
new file mode 100644
index 0000000..83f87f9
--- /dev/null
+++ b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.Validation.cs
@@ -0,0 +1,361 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Diagnostics;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Numerics;
+using Internal.Runtime.CompilerServices;
+
+#if BIT64
+using nint = System.Int64;
+using nuint = System.UInt64;
+#else // BIT64
+using nint = System.Int32;
+using nuint = System.UInt32;
+#endif // BIT64
+
+namespace System.Text.Unicode
+{
+    internal static unsafe partial class Utf16Utility
+    {
+        // Returns &inputBuffer[inputLength] if the input buffer is valid.
+        /// <summary>
+        /// Given an input buffer <paramref name="pInputBuffer"/> of char length <paramref name="inputLength"/>,
+        /// returns a pointer to where the first invalid data appears in <paramref name="pInputBuffer"/>.
+        /// </summary>
+        /// <remarks>
+        /// Returns a pointer to the end of <paramref name="pInputBuffer"/> if the buffer is well-formed.
+        /// </remarks>
+        public static char* GetPointerToFirstInvalidChar(char* pInputBuffer, int inputLength, out long utf8CodeUnitCountAdjustment, out int scalarCountAdjustment)
+        {
+            // First, we'll handle the common case of all-ASCII. If this is able to
+            // consume the entire buffer, we'll skip the remainder of this method's logic.
+
+            int numAsciiCharsConsumedJustNow = (int)ASCIIUtility.GetIndexOfFirstNonAsciiChar(pInputBuffer, (uint)inputLength);
+            Debug.Assert(0 <= numAsciiCharsConsumedJustNow && numAsciiCharsConsumedJustNow <= inputLength);
+
+            pInputBuffer += (uint)numAsciiCharsConsumedJustNow;
+            inputLength -= numAsciiCharsConsumedJustNow;
+
+            if (inputLength == 0)
+            {
+                utf8CodeUnitCountAdjustment = 0;
+                scalarCountAdjustment = 0;
+                return pInputBuffer;
+            }
+
+            // If we got here, it means we saw some non-ASCII data, so within our
+            // vectorized code paths below we'll handle all non-surrogate UTF-16
+            // code points branchlessly. We'll only branch if we see surrogates.
+            // 
+            // We still optimistically assume the data is mostly ASCII. This means that the
+            // number of UTF-8 code units and the number of scalars almost matches the number
+            // of UTF-16 code units. As we go through the input and find non-ASCII
+            // characters, we'll keep track of these "adjustment" fixups. To get the
+            // total number of UTF-8 code units required to encode the input data, add
+            // the UTF-8 code unit count adjustment to the number of UTF-16 code units
+            // seen.  To get the total number of scalars present in the input data,
+            // add the scalar count adjustment to the number of UTF-16 code units seen.
+
+            long tempUtf8CodeUnitCountAdjustment = 0;
+            int tempScalarCountAdjustment = 0;
+
+            if (Sse41.IsSupported)
+            {
+                if (inputLength >= Vector128<ushort>.Count)
+                {
+                    Vector128<ushort> vector0080 = Vector128.Create((ushort)0x80);
+                    Vector128<ushort> vector0800 = Sse2.ShiftLeftLogical(vector0080, 4); // = 0x0800
+                    Vector128<ushort> vectorA800 = Vector128.Create((ushort)0xA800);
+                    Vector128<short> vector8800 = Vector128.Create(unchecked((short)0x8800));
+
+                    do
+                    {
+                        Vector128<ushort> utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer);
+
+                        uint mask = (uint)Sse2.MoveMask(
+                            Sse2.Or(
+                                Sse2.ShiftLeftLogical(Sse41.Min(utf16Data, vector0080), 8),
+                                Sse2.ShiftRightLogical(Sse41.Min(utf16Data, vector0800), 4)).AsByte());
+
+                        // Each odd bit of mask will be 1 only if the char was >= 0x0080,
+                        // and each even bit of mask will be 1 only if the char was >= 0x0800.
+                        //
+                        // Example for UTF-16 input "[ 0123 ] [ 1234 ] ...":
+                        //
+                        //            ,-- set if char[1] is non-ASCII
+                        //            |   ,-- set if char[0] is non-ASCII
+                        //            v   v
+                        // mask = ... 1 1 1 0
+                        //              ^   ^-- set if char[0] is >= 0x800
+                        //              `-- set if char[1] is >= 0x800
+                        //
+                        // This means we can popcnt the number of set bits, and the result is the
+                        // number of *additional* UTF-8 bytes that each UTF-16 code unit requires as
+                        // it expands. This results in the wrong count for UTF-16 surrogate code
+                        // units (we just counted that each individual code unit expands to 3 bytes,
+                        // but in reality a well-formed UTF-16 surrogate pair expands to 4 bytes).
+                        // We'll handle this in just a moment.
+                        //
+                        // For now, compute the popcnt but squirrel it away. We'll fold it in to the
+                        // cumulative UTF-8 adjustment factor once we determine that there are no
+                        // unpaired surrogates in our data. (Unpaired surrogates would invalidate
+                        // our computed result and we'd have to throw it away.)
+
+                        uint popcnt = (uint)BitOperations.PopCount(mask);
+
+                        // Surrogates need to be special-cased for two reasons: (a) we need
+                        // to account for the fact that we over-counted in the addition above;
+                        // and (b) they require separate validation.
+
+                        utf16Data = Sse2.Add(utf16Data, vectorA800);
+                        mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte());
+
+                        if (mask != 0)
+                        {
+                            // There's at least one UTF-16 surrogate code unit present.
+                            // Since we performed a pmovmskb operation on the result of a 16-bit pcmpgtw,
+                            // the resulting bits of 'mask' will occur in pairs:
+                            // - 00 if the corresponding UTF-16 char was not a surrogate code unit;
+                            // - 11 if the corresponding UTF-16 char was a surrogate code unit.
+                            //
+                            // A UTF-16 high/low surrogate code unit has the bit pattern [ 11011q## ######## ],
+                            // where # is any bit; q = 0 represents a high surrogate, and q = 1 represents
+                            // a low surrogate. Since we added 0xA800 in the vectorized operation above,
+                            // our surrogate pairs will now have the bit pattern [ 10000q## ######## ].
+                            // If we logical right-shift each word by 3, we'll end up with the bit pattern
+                            // [ 00010000 q####### ], which means that we can immediately use pmovmskb to
+                            // determine whether a given char was a high or a low surrogate.
+                            //
+                            // Therefore the resulting bits of 'mask2' will occur in pairs:
+                            // - 00 if the corresponding UTF-16 char was a high surrogate code unit;
+                            // - 01 if the corresponding UTF-16 char was a low surrogate code unit;
+                            // - ## (garbage) if the corresponding UTF-16 char was not a surrogate code unit.
+
+                            uint mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte());
+
+                            uint lowSurrogatesMask = mask2 & mask; // 01 only if was a low surrogate char, else 00
+                            uint highSurrogatesMask = (mask2 ^ mask) & 0x5555u; // 01 only if was a high surrogate char, else 00
+
+                            // Now check that each high surrogate is followed by a low surrogate and that each
+                            // low surrogate follows a high surrogate. We make an exception for the case where
+                            // the final char of the vector is a high surrogate, since we can't perform validation
+                            // on it until the next iteration of the loop when we hope to consume the matching
+                            // low surrogate.
+
+                            highSurrogatesMask <<= 2;
+                            if ((ushort)highSurrogatesMask != lowSurrogatesMask)
+                            {
+                                goto NonVectorizedLoop; // error: mismatched surrogate pair; break out of vectorized logic
+                            }
+
+                            if (highSurrogatesMask > ushort.MaxValue)
+                            {
+                                // There was a standalone high surrogate at the end of the vector.
+                                // We'll adjust our counters so that we don't consider this char consumed.
+
+                                highSurrogatesMask = (ushort)highSurrogatesMask; // don't allow stray high surrogate to be consumed by popcnt
+                                popcnt -= 2; // the '0xC000_0000' bits in the original mask are shifted out and discarded, so account for that here
+                                pInputBuffer--;
+                                inputLength++;
+                            }
+
+                            int surrogatePairsCount = BitOperations.PopCount(highSurrogatesMask);
+
+                            // 2 UTF-16 chars become 1 Unicode scalar
+
+                            tempScalarCountAdjustment -= surrogatePairsCount;
+
+                            // Since each surrogate code unit was >= 0x0800, we eagerly assumed
+                            // it'd be encoded as 3 UTF-8 code units, so our earlier popcnt computation
+                            // assumes that the pair is encoded as 6 UTF-8 code units. Since each
+                            // pair is in reality only encoded as 4 UTF-8 code units, we need to
+                            // perform this adjustment now.
+
+                            nint surrogatePairsCountNint = (nint)(nuint)(uint)surrogatePairsCount; // zero-extend to native int size
+                            tempUtf8CodeUnitCountAdjustment -= surrogatePairsCountNint;
+                            tempUtf8CodeUnitCountAdjustment -= surrogatePairsCountNint;
+                        }
+
+                        tempUtf8CodeUnitCountAdjustment += popcnt;
+                        pInputBuffer += Vector128<ushort>.Count;
+                        inputLength -= Vector128<ushort>.Count;
+                    } while (inputLength >= Vector128<ushort>.Count);
+                }
+            }
+            else if (Vector.IsHardwareAccelerated)
+            {
+                if (inputLength >= Vector<ushort>.Count)
+                {
+                    Vector<ushort> vector0080 = new Vector<ushort>(0x0080);
+                    Vector<ushort> vector0400 = new Vector<ushort>(0x0400);
+                    Vector<ushort> vector0800 = new Vector<ushort>(0x0800);
+                    Vector<ushort> vectorD800 = new Vector<ushort>(0xD800);
+
+                    do
+                    {
+                        // The 'twoOrMoreUtf8Bytes' and 'threeOrMoreUtf8Bytes' vectors will contain
+                        // elements whose values are 0xFFFF (-1 as signed word) iff the corresponding
+                        // UTF-16 code unit was >= 0x0080 and >= 0x0800, respectively. By summing these
+                        // vectors, each element of the sum will contain one of three values:
+                        //
+                        // 0x0000 ( 0) = original char was 0000..007F
+                        // 0xFFFF (-1) = original char was 0080..07FF
+                        // 0xFFFE (-2) = original char was 0800..FFFF
+                        //
+                        // We'll negate them to produce a value 0..2 for each element, then sum all the
+                        // elements together to produce the number of *additional* UTF-8 code units
+                        // required to represent this UTF-16 data. This is similar to the popcnt step
+                        // performed by the SSE41 code path. This will overcount surrogates, but we'll
+                        // handle that shortly.
+
+                        Vector<ushort> utf16Data = Unsafe.ReadUnaligned<Vector<ushort>>(pInputBuffer);
+                        Vector<ushort> twoOrMoreUtf8Bytes = Vector.GreaterThanOrEqual(utf16Data, vector0080);
+                        Vector<ushort> threeOrMoreUtf8Bytes = Vector.GreaterThanOrEqual(utf16Data, vector0800);
+                        Vector<nuint> sumVector = (Vector<nuint>)(-Vector.Add(twoOrMoreUtf8Bytes, threeOrMoreUtf8Bytes));
+
+                        // We'll try summing by a natural word (rather than a 16-bit word) at a time,
+                        // which should halve the number of operations we must perform.
+
+                        nuint popcnt = 0;
+                        for (int i = 0; i < Vector<nuint>.Count; i++)
+                        {
+                            popcnt += sumVector[i];
+                        }
+
+                        uint popcnt32 = (uint)popcnt;
+                        if (IntPtr.Size == 8)
+                        {
+                            popcnt32 += (uint)(popcnt >> 32);
+                        }
+
+                        // As in the SSE4.1 paths, compute popcnt but don't fold it in until we
+                        // know there aren't any unpaired surrogates in the input data.
+
+                        popcnt32 = (ushort)popcnt32 + (popcnt32 >> 16);
+
+                        // Now check for surrogates.
+
+                        utf16Data -= vectorD800;
+                        Vector<ushort> surrogateChars = Vector.LessThan(utf16Data, vector0800);
+                        if (surrogateChars != Vector<ushort>.Zero)
+                        {
+                            // There's at least one surrogate (high or low) UTF-16 code unit in
+                            // the vector. We'll build up additional vectors: 'highSurrogateChars'
+                            // and 'lowSurrogateChars', where the elements are 0xFFFF iff the original
+                            // UTF-16 code unit was a high or low surrogate, respectively.
+
+                            Vector<ushort> highSurrogateChars = Vector.LessThan(utf16Data, vector0400);
+                            Vector<ushort> lowSurrogateChars = Vector.AndNot(surrogateChars, highSurrogateChars);
+
+                            // We want to make sure that each high surrogate code unit is followed by
+                            // a low surrogate code unit and each low surrogate code unit follows a
+                            // high surrogate code unit. Since we don't have an equivalent of pmovmskb
+                            // or palignr available to us, we'll do this as a loop. We won't look at
+                            // the very last high surrogate char element since we don't yet know if
+                            // the next vector read will have a low surrogate char element.
+
+                            ushort surrogatePairsCount = 0;
+                            for (int i = 0; i < Vector<ushort>.Count - 1; i++)
+                            {
+                                surrogatePairsCount -= highSurrogateChars[i];
+                                if (highSurrogateChars[i] != lowSurrogateChars[i + 1])
+                                {
+                                    goto NonVectorizedLoop; // error: mismatched surrogate pair; break out of vectorized logic
+                                }
+                            }
+
+                            if (highSurrogateChars[Vector<ushort>.Count - 1] != 0)
+                            {
+                                // There was a standalone high surrogate at the end of the vector.
+                                // We'll adjust our counters so that we don't consider this char consumed.
+
+                                pInputBuffer--;
+                                inputLength++;
+                                popcnt32 -= 2;
+                                tempScalarCountAdjustment--;
+                            }
+
+                            nint surrogatePairsCountNint = (nint)surrogatePairsCount; // zero-extend to native int size
+
+                            // 2 UTF-16 chars become 1 Unicode scalar
+
+                            tempScalarCountAdjustment -= (int)surrogatePairsCountNint;
+
+                            // Since each surrogate code unit was >= 0x0800, we eagerly assumed
+                            // it'd be encoded as 3 UTF-8 code units. Each surrogate half is only
+                            // encoded as 2 UTF-8 code units (for 4 UTF-8 code units total),
+                            // so we'll adjust this now.
+
+                            tempUtf8CodeUnitCountAdjustment -= surrogatePairsCountNint;
+                            tempUtf8CodeUnitCountAdjustment -= surrogatePairsCountNint;
+                        }
+
+                        tempUtf8CodeUnitCountAdjustment += popcnt32;
+                        pInputBuffer += Vector<ushort>.Count;
+                        inputLength -= Vector<ushort>.Count;
+                    } while (inputLength >= Vector<ushort>.Count);
+                }
+            }
+
+        NonVectorizedLoop:
+
+            // Vectorization isn't supported on our current platform, or the input was too small to benefit
+            // from vectorization, or we saw invalid UTF-16 data in the vectorized code paths and need to
+            // drain remaining valid chars before we report failure.
+
+            for (; inputLength > 0; pInputBuffer++, inputLength--)
+            {
+                uint thisChar = pInputBuffer[0];
+                if (thisChar <= 0x7F)
+                {
+                    continue;
+                }
+
+                // Bump adjustment by +1 for U+0080..U+07FF; by +2 for U+0800..U+FFFF.
+                // This optimistically assumes no surrogates, which we'll handle shortly.
+
+                tempUtf8CodeUnitCountAdjustment += (thisChar + 0x0001_F800u) >> 16;
+
+                if (!UnicodeUtility.IsSurrogateCodePoint(thisChar))
+                {
+                    continue;
+                }
+
+                // Found a surrogate char. Back out the adjustment we made above, then
+                // try to consume the entire surrogate pair all at once. We won't bother
+                // trying to interpret the surrogate pair as a scalar value; we'll only
+                // validate that its bit pattern matches what's expected for a surrogate pair.
+
+                tempUtf8CodeUnitCountAdjustment -= 2;
+
+                if (inputLength == 1)
+                {
+                    goto Error; // input buffer too small to read a surrogate pair
+                }
+
+                thisChar = Unsafe.ReadUnaligned<uint>(pInputBuffer);
+                if (((thisChar - (BitConverter.IsLittleEndian ? 0xDC00_D800u : 0xD800_DC00u)) & 0xFC00_FC00u) != 0)
+                {
+                    goto Error; // not a well-formed surrogate pair
+                }
+
+                tempScalarCountAdjustment--; // 2 UTF-16 code units -> 1 scalar
+                tempUtf8CodeUnitCountAdjustment += 2; // 2 UTF-16 code units -> 4 UTF-8 code units
+
+                pInputBuffer++; // consumed one extra char
+                inputLength--;
+            }
+
+        Error:
+
+            // Also used for normal return.
+
+            utf8CodeUnitCountAdjustment = tempUtf8CodeUnitCountAdjustment;
+            scalarCountAdjustment = tempScalarCountAdjustment;
+            return pInputBuffer;
+        }
+    }
+}
diff --git a/src/System.Private.CoreLib/shared/System/Text/Utf16Utility.cs b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.cs
similarity index 99%
rename from src/System.Private.CoreLib/shared/System/Text/Utf16Utility.cs
rename to src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.cs
index bed3905..828776b 100644
--- a/src/System.Private.CoreLib/shared/System/Text/Utf16Utility.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf16Utility.cs
@@ -5,7 +5,7 @@
 using System.Runtime.CompilerServices;
 using System.Diagnostics;
 
-namespace System.Text
+namespace System.Text.Unicode
 {
     internal static partial class Utf16Utility
     {
diff --git a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8.cs b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8.cs
index 657dc17..b4cae37 100644
--- a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8.cs
@@ -39,7 +39,7 @@ namespace System.Text.Unicode
         /// in <paramref name="source"/> will be replaced with U+FFFD in <paramref name="destination"/>, and
         /// this method will not return <see cref="OperationStatus.InvalidData"/>.
         /// </remarks>
-        public static unsafe OperationStatus FromUtf16(ReadOnlySpan<char> source, Span<byte> destination, out int numCharsRead, out int numBytesWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true)
+        public static unsafe OperationStatus FromUtf16(ReadOnlySpan<char> source, Span<byte> destination, out int charsRead, out int bytesWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true)
         {
             // Throwaway span accesses - workaround for https://github.com/dotnet/coreclr/issues/23437
 
@@ -116,8 +116,8 @@ namespace System.Text.Unicode
 
                 // Not possible to make any further progress - report to our caller how far we got.
 
-                numCharsRead = (int)(pInputBufferRemaining - pOriginalSource);
-                numBytesWritten = (int)(pOutputBufferRemaining - pOriginalDestination);
+                charsRead = (int)(pInputBufferRemaining - pOriginalSource);
+                bytesWritten = (int)(pOutputBufferRemaining - pOriginalDestination);
                 return operationStatus;
             }
         }
diff --git a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Transcoding.cs b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Transcoding.cs
index c9ae2d9..0008d0e 100644
--- a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Transcoding.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Transcoding.cs
@@ -928,7 +928,7 @@ namespace System.Text.Unicode
 
                     if (BitConverter.IsLittleEndian && Bmi2.X64.IsSupported)
                     {
-                        const ulong PEXT_MASK = 0x007F007F_007F007Ful;
+                        const ulong PEXT_MASK = 0x00FF00FF_00FF00FFul;
 
                         // Try reading and writing 8 elements per iteration.
                         uint maxIters = minElementsRemaining / 8;
diff --git a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Validation.cs b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Validation.cs
index 68cd054..cf6c9bc 100644
--- a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Validation.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Validation.cs
@@ -40,14 +40,13 @@ namespace System.Text.Unicode
                 // Quick check - did we just end up consuming the entire input buffer?
                 // If so, short-circuit the remainder of the method.
 
-                if ((int)numAsciiBytesCounted == inputLength)
+                inputLength -= (int)numAsciiBytesCounted;
+                if (inputLength == 0)
                 {
                     utf16CodeUnitCountAdjustment = 0;
                     scalarCountAdjustment = 0;
                     return pInputBuffer;
                 }
-
-                inputLength -= (int)numAsciiBytesCounted;
             }
 
 #if DEBUG
@@ -604,9 +603,9 @@ namespace System.Text.Unicode
             Debug.Assert(inputBufferRemainingBytes < 4);
             while (inputBufferRemainingBytes > 0)
             {
-                byte firstByte = pInputBuffer[0];
+                uint firstByte = pInputBuffer[0];
 
-                if (firstByte < 0x80u)
+                if ((byte)firstByte < 0x80u)
                 {
                     // 1-byte (ASCII) case
                     pInputBuffer++;
@@ -616,10 +615,10 @@ namespace System.Text.Unicode
                 else if (inputBufferRemainingBytes >= 2)
                 {
                     uint secondByte = pInputBuffer[1]; // typed as 32-bit since we perform arithmetic (not just comparisons) on this value
-                    if (firstByte < 0xE0u)
+                    if ((byte)firstByte < 0xE0u)
                     {
                         // 2-byte case
-                        if (firstByte >= 0xC2u && IsLowByteUtf8ContinuationByte(secondByte))
+                        if ((byte)firstByte >= 0xC2u && IsLowByteUtf8ContinuationByte(secondByte))
                         {
                             pInputBuffer += 2;
                             tempUtf16CodeUnitCountAdjustment--; // 2 UTF-8 bytes -> 1 UTF-16 code unit (and 1 scalar)
@@ -629,16 +628,16 @@ namespace System.Text.Unicode
                     }
                     else if (inputBufferRemainingBytes >= 3)
                     {
-                        if (firstByte <= 0xF0u)
+                        if ((byte)firstByte < 0xF0u)
                         {
-                            if (firstByte == 0xE0u)
+                            if ((byte)firstByte == 0xE0u)
                             {
                                 if (!UnicodeUtility.IsInRangeInclusive(secondByte, 0xA0u, 0xBFu))
                                 {
                                     goto Error; // overlong encoding
                                 }
                             }
-                            else if (firstByte == 0xEDu)
+                            else if ((byte)firstByte == 0xEDu)
                             {
                                 if (!UnicodeUtility.IsInRangeInclusive(secondByte, 0x80u, 0x9Fu))
                                 {
diff --git a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.cs b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.cs
index 218e79d..d24f766 100644
--- a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.cs
@@ -39,7 +39,7 @@ namespace System.Text.Unicode
                 int index = (int)(void*)Unsafe.ByteOffset(ref *pUtf8Data, ref *pFirstInvalidByte);
 
                 isAscii = (utf16CodeUnitCountAdjustment == 0); // If UTF-16 char count == UTF-8 byte count, it's ASCII.
-                return (index <= utf8Data.Length) ? index : -1;
+                return (index < utf8Data.Length) ? index : -1;
             }
         }
 
-- 
2.7.4