Use Sse2 instrinsics to make NeedsEscaping check faster for large JSON strings (dotne...
authorAhson Khan <ahson_ahmedk@yahoo.com>
Tue, 22 Oct 2019 07:59:19 +0000 (00:59 -0700)
committerGitHub <noreply@github.com>
Tue, 22 Oct 2019 07:59:19 +0000 (00:59 -0700)
* Use Sse2 instrinsics to make NeedsEscaping check faster for large
strings.

* Update the utf-8 bytes needsescaping and add tests.

* Remove unnecessary bitwise OR and add more tests

* Add more tests around surrogates, invalid strings, and characters >
short.MaxValue.

Commit migrated from https://github.com/dotnet/corefx/commit/7cae92b39d2fed2e4e88e900f2e7d787ed9f6cfa

src/libraries/System.Text.Json/src/System.Text.Json.csproj
src/libraries/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs
src/libraries/System.Text.Json/tests/Utf8JsonWriterTests.cs

index 5eec31c..45cb66c 100644 (file)
     <Reference Include="System.Resources.ResourceManager" />
     <Reference Include="System.Runtime" />
     <Reference Include="System.Runtime.Extensions" />
+    <Reference Include="System.Runtime.Intrinsics" />
     <Reference Include="System.Text.Encoding.Extensions" />
   </ItemGroup>
   <ItemGroup>
index a41035b..5c6c27f 100644 (file)
@@ -5,10 +5,16 @@
 using System.Buffers;
 using System.Buffers.Text;
 using System.Diagnostics;
+using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Text.Encodings.Web;
 
+#if BUILDING_INBOX_LIBRARY
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
 namespace System.Text.Json
 {
     // TODO: Replace the escaping logic with publicly shipping APIs from https://github.com/dotnet/corefx/issues/33509
@@ -55,57 +61,202 @@ namespace System.Text.Json
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private static bool NeedsEscaping(char value) => value > LastAsciiCharacter || AllowList[value] == 0;
 
-        public static int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder encoder)
+#if BUILDING_INBOX_LIBRARY
+        private static readonly Vector128<short> s_mask_UInt16_0x20 = Vector128.Create((short)0x20); // Space ' '
+
+        private static readonly Vector128<short> s_mask_UInt16_0x22 = Vector128.Create((short)0x22); // Quotation Mark '"'
+        private static readonly Vector128<short> s_mask_UInt16_0x26 = Vector128.Create((short)0x26); // Ampersand '&'
+        private static readonly Vector128<short> s_mask_UInt16_0x27 = Vector128.Create((short)0x27); // Apostrophe '''
+        private static readonly Vector128<short> s_mask_UInt16_0x2B = Vector128.Create((short)0x2B); // Plus sign '+'
+        private static readonly Vector128<short> s_mask_UInt16_0x3C = Vector128.Create((short)0x3C); // Less Than Sign '<'
+        private static readonly Vector128<short> s_mask_UInt16_0x3E = Vector128.Create((short)0x3E); // Greater Than Sign '>'
+        private static readonly Vector128<short> s_mask_UInt16_0x5C = Vector128.Create((short)0x5C); // Reverse Solidus '\'
+        private static readonly Vector128<short> s_mask_UInt16_0x60 = Vector128.Create((short)0x60); // Grave Access '`'
+
+        private static readonly Vector128<short> s_mask_UInt16_0x7E = Vector128.Create((short)0x7E); // Tilde '~'
+
+        private static readonly Vector128<sbyte> s_mask_SByte_0x20 = Vector128.Create((sbyte)0x20); // Space ' '
+
+        private static readonly Vector128<sbyte> s_mask_SByte_0x22 = Vector128.Create((sbyte)0x22); // Quotation Mark '"'
+        private static readonly Vector128<sbyte> s_mask_SByte_0x26 = Vector128.Create((sbyte)0x26); // Ampersand '&'
+        private static readonly Vector128<sbyte> s_mask_SByte_0x27 = Vector128.Create((sbyte)0x27); // Apostrophe '''
+        private static readonly Vector128<sbyte> s_mask_SByte_0x2B = Vector128.Create((sbyte)0x2B); // Plus sign '+'
+        private static readonly Vector128<sbyte> s_mask_SByte_0x3C = Vector128.Create((sbyte)0x3C); // Less Than Sign '<'
+        private static readonly Vector128<sbyte> s_mask_SByte_0x3E = Vector128.Create((sbyte)0x3E); // Greater Than Sign '>'
+        private static readonly Vector128<sbyte> s_mask_SByte_0x5C = Vector128.Create((sbyte)0x5C); // Reverse Solidus '\'
+        private static readonly Vector128<sbyte> s_mask_SByte_0x60 = Vector128.Create((sbyte)0x60); // Grave Access '`'
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector128<short> CreateEscapingMask(Vector128<short> sourceValue)
         {
-            int idx;
+            Debug.Assert(Sse2.IsSupported);
 
-            if (encoder != null)
-            {
-                idx = encoder.FindFirstCharacterToEncodeUtf8(value);
-                goto Return;
-            }
+            Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x20); // Space ' ', anything in the control characters range
+
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x22)); // Quotation Mark '"'
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x26)); // Ampersand '&'
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x27)); // Apostrophe '''
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x2B)); // Plus sign '+'
+
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3C)); // Less Than Sign '<'
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3E)); // Greater Than Sign '>'
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x5C)); // Reverse Solidus '\'
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x60)); // Grave Access '`'
+
+            mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range
+
+            return mask;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector128<sbyte> CreateEscapingMask(Vector128<sbyte> sourceValue)
+        {
+            Debug.Assert(Sse2.IsSupported);
 
-            for (idx = 0; idx < value.Length; idx++)
+            Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x20); // Control characters, and anything above 0x7E since sbyte.MaxValue is 0x7E
+
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x22)); // Quotation Mark "
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x26)); // Ampersand &
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x27)); // Apostrophe '
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x2B)); // Plus sign +
+
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3C)); // Less Than Sign <
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3E)); // Greater Than Sign >
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x5C)); // Reverse Solidus \
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x60)); // Grave Access `
+
+            return mask;
+        }
+#endif
+
+        public static unsafe int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder encoder)
+        {
+            fixed (byte* ptr = value)
             {
-                if (NeedsEscaping(value[idx]))
+                int idx = 0;
+
+                if (encoder != null)
                 {
+                    idx = encoder.FindFirstCharacterToEncodeUtf8(value);
                     goto Return;
                 }
-            }
 
-            idx = -1; // all characters allowed
+#if BUILDING_INBOX_LIBRARY
+                if (Sse2.IsSupported)
+                {
+                    sbyte* startingAddress = (sbyte*)ptr;
+                    while (value.Length - 16 >= idx)
+                    {
+                        Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 16));
+
+                        // Load the next 16 bytes.
+                        Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);
+
+                        // Check if any of the 16 bytes need to be escaped.
+                        Vector128<sbyte> mask = CreateEscapingMask(sourceValue);
+
+                        int index = Sse2.MoveMask(mask.AsByte());
+                        // If index == 0, that means none of the 16 bytes needed to be escaped.
+                        // TrailingZeroCount is relatively expensive, avoid it if possible.
+                        if (index != 0)
+                        {
+                            // Found at least one byte that needs to be escaped, figure out the index of
+                            // the first one found that needed to be escaped within the 16 bytes.
+                            Debug.Assert(index > 0 && index <= 65_535);
+                            int tzc = BitOperations.TrailingZeroCount(index);
+                            Debug.Assert(tzc >= 0 && tzc <= 16);
+                            idx += tzc;
+                            goto Return;
+                        }
+                        idx += 16;
+                        startingAddress += 16;
+                    }
+
+                    // Process the remaining characters.
+                    Debug.Assert(value.Length - idx < 16);
+                }
+#endif
+
+                for (; idx < value.Length; idx++)
+                {
+                    Debug.Assert((ptr + idx) <= (ptr + value.Length));
+                    if (NeedsEscaping(*(ptr + idx)))
+                    {
+                        goto Return;
+                    }
+                }
 
-        Return:
-            return idx;
+                idx = -1; // all characters allowed
+
+            Return:
+                return idx;
+            }
         }
 
         public static unsafe int NeedsEscaping(ReadOnlySpan<char> value, JavaScriptEncoder encoder)
         {
-            int idx;
-
-            // Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept
-            // null pointers and gaurd against that. Hence, check up-front and fall down to return -1.
-            if (encoder != null && !value.IsEmpty)
+            fixed (char* ptr = value)
             {
-                fixed (char* ptr = value)
+                int idx = 0;
+
+                // Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept
+                // null pointers and gaurd against that. Hence, check up-front and fall down to return -1.
+                if (encoder != null && !value.IsEmpty)
                 {
                     idx = encoder.FindFirstCharacterToEncode(ptr, value.Length);
+                    goto Return;
                 }
-                goto Return;
-            }
 
-            for (idx = 0; idx < value.Length; idx++)
-            {
-                if (NeedsEscaping(value[idx]))
+#if BUILDING_INBOX_LIBRARY
+                if (Sse2.IsSupported)
                 {
-                    goto Return;
+                    short* startingAddress = (short*)ptr;
+                    while (value.Length - 8 >= idx)
+                    {
+                        Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 8));
+
+                        // Load the next 8 characters.
+                        Vector128<short> sourceValue = Sse2.LoadVector128(startingAddress);
+
+                        // Check if any of the 8 characters need to be escaped.
+                        Vector128<short> mask = CreateEscapingMask(sourceValue);
+
+                        int index = Sse2.MoveMask(mask.AsByte());
+                        // If index == 0, that means none of the 8 characters needed to be escaped.
+                        // TrailingZeroCount is relatively expensive, avoid it if possible.
+                        if (index != 0)
+                        {
+                            // Found at least one character that needs to be escaped, figure out the index of
+                            // the first one found that needed to be escaped within the 8 characters.
+                            Debug.Assert(index > 0 && index <= 65_535);
+                            int tzc = BitOperations.TrailingZeroCount(index);
+                            Debug.Assert(tzc % 2 == 0 && tzc >= 0 && tzc <= 16);
+                            idx += tzc >> 1;
+                            goto Return;
+                        }
+                        idx += 8;
+                        startingAddress += 8;
+                    }
+
+                    // Process the remaining characters.
+                    Debug.Assert(value.Length - idx < 8);
+                }
+#endif
+
+                for (; idx < value.Length; idx++)
+                {
+                    Debug.Assert((ptr + idx) <= (ptr + value.Length));
+                    if (NeedsEscaping(*(ptr + idx)))
+                    {
+                        goto Return;
+                    }
                 }
-            }
 
-            idx = -1; // all characters allowed
+                idx = -1; // All characters are allowed.
 
-        Return:
-            return idx;
+            Return:
+                return idx;
+            }
         }
 
         public static int GetMaxEscapedLength(int textLength, int firstIndexToEscape)
index f8b625f..1456e0b 100644 (file)
@@ -178,6 +178,396 @@ namespace System.Text.Json.Tests
             JsonTestHelper.AssertContents("\"\u2020\\\"\"", output);
         }
 
+        [Theory]
+        [MemberData(nameof(EscapingTestData))]
+        public void EscapingTestWhileWriting(char replacementChar, JavaScriptEncoder encoder, bool requiresEscaping)
+        {
+            var writerOptions = new JsonWriterOptions { Encoder = encoder };
+
+            {
+                ReadOnlyMemory<byte> written = WriteStringHelper(writerOptions, null);
+                Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+                written = WriteUtf8StringHelper(writerOptions, null);
+                Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+                written = WriteStringHelper(writerOptions, string.Empty);
+                Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+                written = WriteUtf8StringHelper(writerOptions, Array.Empty<byte>());
+                Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+            }
+
+            var random = new Random(42);
+            for (int dataLength = 0; dataLength < 50; dataLength++)
+            {
+                char[] str = new char[dataLength];
+                for (int i = 0; i < dataLength; i++)
+                {
+                    str[i] = (char)random.Next(97, 123);
+                }
+                string baseStr = new string(str);
+                byte[] sourceUtf8 = Encoding.UTF8.GetBytes(baseStr);
+
+                ReadOnlyMemory<byte> written = WriteStringHelper(writerOptions, baseStr);
+                Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+                written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+                Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+                for (int i = 0; i < dataLength; i++)
+                {
+                    char[] changed = baseStr.ToCharArray();
+                    changed[i] = replacementChar;
+                    string newStr = new string(changed);
+                    sourceUtf8 = Encoding.UTF8.GetBytes(newStr);
+
+                    written = WriteStringHelper(writerOptions, newStr);
+                    int escapedIndex = written.Span.IndexOf((byte)'\\');
+                    Assert.Equal(requiresEscaping ? (i + 1) : -1, escapedIndex);  // Account for the start quote
+
+                    written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+                    escapedIndex = written.Span.IndexOf((byte)'\\');
+                    Assert.Equal(requiresEscaping ? (i + 1) : -1, escapedIndex);  // Account for the start quote
+                }
+
+                if (dataLength != 0)
+                {
+                    char[] changed = baseStr.ToCharArray();
+                    changed.AsSpan().Fill(replacementChar);
+                    string newStr = new string(changed);
+                    sourceUtf8 = Encoding.UTF8.GetBytes(newStr);
+
+                    written = WriteStringHelper(writerOptions, newStr);
+                    int escapedIndex = written.Span.IndexOf((byte)'\\');
+                    Assert.Equal(requiresEscaping ? 1 : -1, escapedIndex);  // Account for the start quote
+
+                    written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+                    escapedIndex = written.Span.IndexOf((byte)'\\');
+                    Assert.Equal(requiresEscaping ? 1 : -1, escapedIndex);  // Account for the start quote
+                }
+            }
+        }
+
+        public static IEnumerable<object[]> EscapingTestData
+        {
+            get
+            {
+                return new List<object[]>
+                {
+                    new object[] { 'a', null, false },              // ASCII not escaped
+                    new object[] { '\u001F', null, true },          // control character within single byte range
+                    new object[] { '\u2000', null, true },          // space character outside single byte range
+                    new object[] { '\u00A2', null, true },          // non-ASCII but < 255
+                    new object[] { '\uA686', null, true },          // non-ASCII above short.MaxValue
+                    new object[] { '\u6C49', null, true },          // non-ASCII from chinese alphabet - multibyte
+                    new object[] { '"', null, true },               // ASCII but must always be escaped in JSON
+                    new object[] { '\\', null, true },              // ASCII but must always be escaped in JSON
+                    new object[] { '<', null, true },               // ASCII but escaped by default
+                    new object[] { '>', null, true },               // ASCII but escaped by default
+                    new object[] { '&', null, true },               // ASCII but escaped by default
+                    new object[] { '`', null, true },               // ASCII but escaped by default
+                    new object[] { '\'', null, true },              // ASCII but escaped by default
+                    new object[] { '+', null, true },               // ASCII but escaped by default
+
+                    new object[] { 'a', JavaScriptEncoder.Default, false },
+                    new object[] { '\u001F', JavaScriptEncoder.Default, true },
+                    new object[] { '\u2000', JavaScriptEncoder.Default, true },
+                    new object[] { '\u00A2', JavaScriptEncoder.Default, true },
+                    new object[] { '\uA686', JavaScriptEncoder.Default, true },
+                    new object[] { '\u6C49', JavaScriptEncoder.Default, true },
+                    new object[] { '"', JavaScriptEncoder.Default, true },
+                    new object[] { '\\', JavaScriptEncoder.Default, true },
+                    new object[] { '<', JavaScriptEncoder.Default, true },
+                    new object[] { '>', JavaScriptEncoder.Default, true },
+                    new object[] { '&', JavaScriptEncoder.Default, true },
+                    new object[] { '`', JavaScriptEncoder.Default, true },
+                    new object[] { '\'', JavaScriptEncoder.Default, true },
+                    new object[] { '+', JavaScriptEncoder.Default, true },
+
+                    new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), false },
+                    new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+
+                    new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+                    new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+                    new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+                    new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+                    new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+
+                    new object[] { 'a', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '\u001F', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+                    new object[] { '\u2000', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+                    new object[] { '\u00A2', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '\uA686', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '\u6C49', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '"', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+                    new object[] { '\\', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+                    new object[] { '<', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '>', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '&', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '`', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '\'', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '+', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                };
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(EscapingTestData_NonAscii))]
+        public unsafe void WriteString_NonAscii(char replacementChar, JavaScriptEncoder encoder, bool requiresEscaping)
+        {
+            var writerOptions = new JsonWriterOptions { Encoder = encoder };
+            var random = new Random(42);
+            for (int dataLength = 1; dataLength < 50; dataLength++)
+            {
+                char[] str = new char[dataLength];
+                for (int i = 0; i < dataLength; i++)
+                {
+                    str[i] = (char)random.Next(0x2E9B, 0x2EF4); // CJK Radicals Supplement characters
+                }
+                string baseStr = new string(str);
+                byte[] sourceUtf8 = Encoding.UTF8.GetBytes(baseStr);
+
+                ReadOnlyMemory<byte> written = WriteStringHelper(writerOptions, baseStr);
+                Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+                written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+                Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+                for (int i = 0; i < dataLength; i++)
+                {
+                    string source = baseStr.Insert(i, new string(replacementChar, 1));
+                    sourceUtf8 = Encoding.UTF8.GetBytes(source);
+
+                    written = WriteStringHelper(writerOptions, source);
+                    int escapedIndex = written.Span.IndexOf((byte)'\\');
+                    // Each CJK character expands to 3 utf-8 bytes.
+                    Assert.Equal(requiresEscaping ? ((i * 3) + 1) : -1, escapedIndex);  // Account for the start quote
+
+                    written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+                    escapedIndex = written.Span.IndexOf((byte)'\\');
+                    // Each CJK character expands to 3 utf-8 bytes.
+                    Assert.Equal(requiresEscaping ? ((i * 3) + 1) : -1, escapedIndex);  // Account for the start quote
+                }
+            }
+        }
+
+        public static IEnumerable<object[]> EscapingTestData_NonAscii
+        {
+            get
+            {
+                return new List<object[]>
+                {
+                    new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+                    new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+                    new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+                    new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+                    new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+
+                    new object[] { 'a', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '\u001F', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+                    new object[] { '\u2000', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+                    new object[] { '\u00A2', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '\uA686', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '\u6C49', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '"', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+                    new object[] { '\\', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+                    new object[] { '<', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '>', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '&', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '`', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '\'', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '+', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                };
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(JavaScriptEncoders))]
+        public void EscapingTestWhileWritingSurrogate(JavaScriptEncoder encoder)
+        {
+            char highSurrogate = '\uD801';
+            char lowSurrogate = '\uDC37';
+            var writerOptions = new JsonWriterOptions { Encoder = encoder };
+            var random = new Random(42);
+            for (int dataLength = 2; dataLength < 50; dataLength++)
+            {
+                char[] str = new char[dataLength];
+                for (int i = 0; i < dataLength; i++)
+                {
+                    str[i] = (char)random.Next(97, 123);
+                }
+                string baseStr = new string(str);
+                byte[] sourceUtf8 = Encoding.UTF8.GetBytes(baseStr);
+
+                ReadOnlyMemory<byte> written = WriteStringHelper(writerOptions, baseStr);
+                Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+                written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+                Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+                for (int i = 0; i < dataLength - 1; i++)
+                {
+                    char[] changed = baseStr.ToCharArray();
+                    changed[i] = highSurrogate;
+                    changed[i + 1] = lowSurrogate;
+                    string newStr = new string(changed);
+                    sourceUtf8 = Encoding.UTF8.GetBytes(newStr);
+
+                    written = WriteStringHelper(writerOptions, newStr);
+                    int escapedIndex = written.Span.IndexOf((byte)'\\');
+                    Assert.Equal(i + 1, escapedIndex);  // Account for the start quote
+
+                    written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+                    escapedIndex = written.Span.IndexOf((byte)'\\');
+                    Assert.Equal(i + 1, escapedIndex);  // Account for the start quote
+                }
+
+                {
+                    char[] changed = baseStr.ToCharArray();
+
+                    for (int i = 0; i < changed.Length - 1; i += 2)
+                    {
+                        changed[i] = highSurrogate;
+                        changed[i + 1] = lowSurrogate;
+                    }
+
+                    string newStr = new string(changed);
+                    sourceUtf8 = Encoding.UTF8.GetBytes(newStr);
+
+                    written = WriteStringHelper(writerOptions, newStr);
+                    int escapedIndex = written.Span.IndexOf((byte)'\\');
+                    Assert.Equal(1, escapedIndex);  // Account for the start quote
+
+                    written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+                    escapedIndex = written.Span.IndexOf((byte)'\\');
+                    Assert.Equal(1, escapedIndex);  // Account for the start quote
+                }
+            }
+        }
+
+        public static IEnumerable<object[]> JavaScriptEncoders
+        {
+            get
+            {
+                return new List<object[]>
+                {
+                    new object[] { null },
+                    new object[] { JavaScriptEncoder.Default },
+                    new object[] { JavaScriptEncoder.Create(UnicodeRanges.BasicLatin) },
+                    new object[] { JavaScriptEncoder.Create(UnicodeRanges.All) },
+                    new object[] { JavaScriptEncoder.UnsafeRelaxedJsonEscaping },
+                };
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(InvalidEscapingTestData))]
+        public unsafe void WriteStringInvalidCharacter(char replacementChar, JavaScriptEncoder encoder, bool requiresEscaping)
+        {
+            var writerOptions = new JsonWriterOptions { Encoder = encoder };
+            var random = new Random(42);
+            for (int dataLength = 0; dataLength < 47; dataLength++)
+            {
+                char[] str = new char[dataLength];
+                for (int i = 0; i < dataLength; i++)
+                {
+                    str[i] = (char)random.Next(97, 123);
+                }
+                string baseStr = new string(str);
+                byte[] baseStrUtf8 = Encoding.UTF8.GetBytes(baseStr);
+
+                for (int i = 0; i < dataLength; i++)
+                {
+                    char[] changed = baseStr.ToCharArray();
+                    changed[i] = replacementChar;
+                    string source = new string(changed);
+                    byte[] sourceUtf8 = new byte[baseStrUtf8.Length];
+                    baseStrUtf8.AsSpan().CopyTo(sourceUtf8);
+                    sourceUtf8[i] = 0xC3;   // Invalid, first byte of a 2-byte utf-8 character
+
+                    ReadOnlyMemory<byte> written = WriteStringHelper(writerOptions, source);
+                    // Some encoders don't escape replacement character
+                    Assert.Equal(requiresEscaping ? i + 1 : -1, written.Span.IndexOf((byte)'\\'));  // Account for the start quote
+
+                    written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+                    // Some encoders don't escape replacement character
+                    Assert.Equal(requiresEscaping ? i + 1 : -1, written.Span.IndexOf((byte)'\\'));  // Account for the start quote
+                }
+            }
+        }
+
+        public static IEnumerable<object[]> InvalidEscapingTestData
+        {
+            get
+            {
+                return new List<object[]>
+                {
+                    new object[] { '\uD801', JavaScriptEncoder.Default, true },         // Invalid, high surrogate alone
+                    new object[] { '\uDC01', JavaScriptEncoder.Default, true },         // Invalid, low surrogate alone
+
+                    new object[] { '\uD801', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+                    new object[] { '\uDC01', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+
+                    new object[] { '\uD801', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+                    new object[] { '\uDC01', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+
+                    new object[] { '\uD801', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '\uDC01', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                };
+            }
+        }
+
+        private static ReadOnlyMemory<byte> WriteStringHelper(JsonWriterOptions writerOptions, string str)
+        {
+            var output = new ArrayBufferWriter<byte>();
+            using (var writer = new Utf8JsonWriter(output, writerOptions))
+            {
+                writer.WriteStringValue(str);
+            }
+            return output.WrittenMemory;
+        }
+
+        private static ReadOnlyMemory<byte> WriteUtf8StringHelper(JsonWriterOptions writerOptions, byte[] utf8str)
+        {
+            var output = new ArrayBufferWriter<byte>();
+            using (var writer = new Utf8JsonWriter(output, writerOptions))
+            {
+                writer.WriteStringValue(utf8str);
+            }
+            return output.WrittenMemory;
+        }
+
         [Fact]
         public void WriteJsonWritesToIBWOnDemand_Dispose()
         {