Add tests for custom JavaScriptEncoder to cover the virtual code paths in TextEncoder...
authorAhson Khan <ahson_ahmedk@yahoo.com>
Wed, 30 Oct 2019 18:05:12 +0000 (11:05 -0700)
committerGitHub <noreply@github.com>
Wed, 30 Oct 2019 18:05:12 +0000 (11:05 -0700)
* Move using directive within ifdef to make it clear when its used.

* Add more tests for custom text encoder case.

* Fix typo in comment gaurd -> guard

* Fix up the using directives that got removed during merge conflict
resolution.

* Address feedback - fix 0x7F case, rename vectors to be self-documenting.

Commit migrated from https://github.com/dotnet/corefx/commit/62e6ccfa09322db02d6c043226bd382debc915c4

src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/DefaultJavaScriptEncoder.cs
src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/Sse2Helper.cs
src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/TextEncoder.cs
src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/UnsafeRelaxedJavaScriptEncoder.cs
src/libraries/System.Text.Encodings.Web/tests/JavaScriptStringEncoderTests.cs
src/libraries/System.Text.Json/src/System/Text/Json/Writer/JsonWriterHelper.Escaping.cs

index ad6ffb9..3fd3c30 100644 (file)
@@ -98,8 +98,9 @@ namespace System.Text.Encodings.Web
                         // Load the next 16 bytes.
                         Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);
 
-                        Vector128<sbyte> mask = Sse2Helper.CreateAsciiMask(sourceValue);
-                        int index = Sse2.MoveMask(mask);
+                        // Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
+                        // casted to signed byte.
+                        int index = Sse2.MoveMask(sourceValue);
 
                         if (index != 0)
                         {
@@ -196,6 +197,7 @@ namespace System.Text.Encodings.Web
                         idx += utf8BytesConsumedForScalar;
                     }
                 }
+                Debug.Assert(idx == utf8Text.Length);
 
                 idx = -1; // All bytes are allowed.
 
index 6caebd3..f36cce2 100644 (file)
@@ -17,11 +17,17 @@ namespace System.Text.Encodings.Web
         {
             Debug.Assert(Sse2.IsSupported);
 
-            // Space ' ', anything in the control characters range, and anything above short.MaxValue but less than or equal char.MaxValue
-            Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x20);
+            // Anything in the control characters range, and anything above short.MaxValue but less than or equal char.MaxValue
+            // That's because anything between 32768 and 65535 (inclusive) will overflow and become negative.
+            Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_spaceMaskInt16);
 
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x22)); // Quotation Mark '"'
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x5C)); // Reverse Solidus '\'
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_quotationMarkMaskInt16));
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_reverseSolidusMaskInt16));
+
+            // Anything above the ASCII range, and also including the leftover control character in the ASCII range - 0x7F
+            // When this method is called with only ASCII data, 0x7F is the only value that would meet this comparison.
+            // However, when called from "Default", the source could contain characters outside the ASCII range.
+            mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_tildeMaskInt16));
 
             return mask;
         }
@@ -31,10 +37,16 @@ namespace System.Text.Encodings.Web
         {
             Debug.Assert(Sse2.IsSupported);
 
-            Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x20); // Control characters, and anything above 0x7E since sbyte.MaxValue is 0x7E
+            // Anything in the control characters range (except 0x7F), and anything above sbyte.MaxValue but less than or equal byte.MaxValue
+            // That's because anything between 128 and 255 (inclusive) will overflow and become negative.
+            Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_spaceMaskSByte);
+
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_quotationMarkMaskSByte));
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_reverseSolidusMaskSByte));
 
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x22)); // Quotation Mark "
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x5C)); // Reverse Solidus \
+            // Leftover control character in the ASCII range - 0x7F
+            // Since we are dealing with sbytes, 0x7F is the only value that would meet this comparison.
+            mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_tildeMaskSByte));
 
             return mask;
         }
@@ -46,14 +58,12 @@ namespace System.Text.Encodings.Web
 
             Vector128<short> mask = CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
 
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x26)); // Ampersand '&'
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x27)); // Apostrophe '''
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x2B)); // Plus sign '+'
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3C)); // Less Than Sign '<'
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3E)); // Greater Than Sign '>'
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x60)); // Grave Access '`'
-
-            mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_ampersandMaskInt16));
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_apostropheMaskInt16));
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_plusSignMaskInt16));
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_lessThanSignMaskInt16));
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_greaterThanSignMaskInt16));
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_graveAccentMaskInt16));
 
             return mask;
         }
@@ -65,12 +75,12 @@ namespace System.Text.Encodings.Web
 
             Vector128<sbyte> mask = CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
 
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x26)); // Ampersand &
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x27)); // Apostrophe '
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x2B)); // Plus sign +
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3C)); // Less Than Sign <
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3E)); // Greater Than Sign >
-            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x60)); // Grave Access `
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_ampersandMaskSByte));
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_apostropheMaskSByte));
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_plusSignMaskSByte));
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_lessThanSignMaskSByte));
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_greaterThanSignMaskSByte));
+            mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_graveAccentMaskSByte));
 
             return mask;
         }
@@ -80,48 +90,38 @@ namespace System.Text.Encodings.Web
         {
             Debug.Assert(Sse2.IsSupported);
 
-            Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x00); // Null, anything above short.MaxValue but less than or equal char.MaxValue
-            mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range
+            // Anything above short.MaxValue but less than or equal char.MaxValue
+            // That's because anything between 32768 and 65535 (inclusive) will overflow and become negative.
+            Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_nullMaskInt16);
 
-            return mask;
-        }
+            // Anything above the ASCII range
+            mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_maxAsciiCharacterMaskInt16));
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector128<sbyte> CreateAsciiMask(Vector128<sbyte> sourceValue)
-        {
-            Debug.Assert(Sse2.IsSupported);
-
-            // Null, anything above sbyte.MaxValue but less than or equal byte.MaxValue (i.e. anything above the ASCII range)
-            Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x00);
             return mask;
         }
 
-        private static readonly Vector128<short> s_mask_UInt16_0x00 = Vector128<short>.Zero; // Null
-
-        private static readonly Vector128<short> s_mask_UInt16_0x20 = Vector128.Create((short)0x20); // Space ' '
-
-        private static readonly Vector128<short> s_mask_UInt16_0x22 = Vector128.Create((short)0x22); // Quotation Mark '"'
-        private static readonly Vector128<short> s_mask_UInt16_0x26 = Vector128.Create((short)0x26); // Ampersand '&'
-        private static readonly Vector128<short> s_mask_UInt16_0x27 = Vector128.Create((short)0x27); // Apostrophe '''
-        private static readonly Vector128<short> s_mask_UInt16_0x2B = Vector128.Create((short)0x2B); // Plus sign '+'
-        private static readonly Vector128<short> s_mask_UInt16_0x3C = Vector128.Create((short)0x3C); // Less Than Sign '<'
-        private static readonly Vector128<short> s_mask_UInt16_0x3E = Vector128.Create((short)0x3E); // Greater Than Sign '>'
-        private static readonly Vector128<short> s_mask_UInt16_0x5C = Vector128.Create((short)0x5C); // Reverse Solidus '\'
-        private static readonly Vector128<short> s_mask_UInt16_0x60 = Vector128.Create((short)0x60); // Grave Access '`'
-
-        private static readonly Vector128<short> s_mask_UInt16_0x7E = Vector128.Create((short)0x7E); // Tilde '~'
-
-        private static readonly Vector128<sbyte> s_mask_SByte_0x00 = Vector128<sbyte>.Zero; // Null
-
-        private static readonly Vector128<sbyte> s_mask_SByte_0x20 = Vector128.Create((sbyte)0x20); // Space ' '
-
-        private static readonly Vector128<sbyte> s_mask_SByte_0x22 = Vector128.Create((sbyte)0x22); // Quotation Mark '"'
-        private static readonly Vector128<sbyte> s_mask_SByte_0x26 = Vector128.Create((sbyte)0x26); // Ampersand '&'
-        private static readonly Vector128<sbyte> s_mask_SByte_0x27 = Vector128.Create((sbyte)0x27); // Apostrophe '''
-        private static readonly Vector128<sbyte> s_mask_SByte_0x2B = Vector128.Create((sbyte)0x2B); // Plus sign '+'
-        private static readonly Vector128<sbyte> s_mask_SByte_0x3C = Vector128.Create((sbyte)0x3C); // Less Than Sign '<'
-        private static readonly Vector128<sbyte> s_mask_SByte_0x3E = Vector128.Create((sbyte)0x3E); // Greater Than Sign '>'
-        private static readonly Vector128<sbyte> s_mask_SByte_0x5C = Vector128.Create((sbyte)0x5C); // Reverse Solidus '\'
-        private static readonly Vector128<sbyte> s_mask_SByte_0x60 = Vector128.Create((sbyte)0x60); // Grave Access '`'
+        private static readonly Vector128<short> s_nullMaskInt16 = Vector128<short>.Zero;
+        private static readonly Vector128<short> s_spaceMaskInt16 = Vector128.Create((short)' ');
+        private static readonly Vector128<short> s_quotationMarkMaskInt16 = Vector128.Create((short)'"');
+        private static readonly Vector128<short> s_ampersandMaskInt16 = Vector128.Create((short)'&');
+        private static readonly Vector128<short> s_apostropheMaskInt16 = Vector128.Create((short)'\'');
+        private static readonly Vector128<short> s_plusSignMaskInt16 = Vector128.Create((short)'+');
+        private static readonly Vector128<short> s_lessThanSignMaskInt16 = Vector128.Create((short)'<');
+        private static readonly Vector128<short> s_greaterThanSignMaskInt16 = Vector128.Create((short)'>');
+        private static readonly Vector128<short> s_reverseSolidusMaskInt16 = Vector128.Create((short)'\\');
+        private static readonly Vector128<short> s_graveAccentMaskInt16 = Vector128.Create((short)'`');
+        private static readonly Vector128<short> s_tildeMaskInt16 = Vector128.Create((short)'~');
+        private static readonly Vector128<short> s_maxAsciiCharacterMaskInt16 = Vector128.Create((short)0x7F); // Delete control character
+
+        private static readonly Vector128<sbyte> s_spaceMaskSByte = Vector128.Create((sbyte)' ');
+        private static readonly Vector128<sbyte> s_quotationMarkMaskSByte = Vector128.Create((sbyte)'"');
+        private static readonly Vector128<sbyte> s_ampersandMaskSByte = Vector128.Create((sbyte)'&');
+        private static readonly Vector128<sbyte> s_apostropheMaskSByte = Vector128.Create((sbyte)'\'');
+        private static readonly Vector128<sbyte> s_plusSignMaskSByte = Vector128.Create((sbyte)'+');
+        private static readonly Vector128<sbyte> s_lessThanSignMaskSByte = Vector128.Create((sbyte)'<');
+        private static readonly Vector128<sbyte> s_greaterThanSignMaskSByte = Vector128.Create((sbyte)'>');
+        private static readonly Vector128<sbyte> s_reverseSolidusMaskSByte = Vector128.Create((sbyte)'\\');
+        private static readonly Vector128<sbyte> s_graveAccentMaskSByte = Vector128.Create((sbyte)'`');
+        private static readonly Vector128<sbyte> s_tildeMaskSByte = Vector128.Create((sbyte)'~');
     }
 }
index b1350c1..f2de4c5 100644 (file)
@@ -714,8 +714,9 @@ namespace System.Text.Encodings.Web
                         // Load the next 16 bytes.
                         Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);
 
-                        Vector128<sbyte> mask = Sse2Helper.CreateAsciiMask(sourceValue);
-                        int index = Sse2.MoveMask(mask);
+                        // Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
+                        // casted to signed byte.
+                        int index = Sse2.MoveMask(sourceValue);
 
                         if (index != 0)
                         {
@@ -812,6 +813,7 @@ namespace System.Text.Encodings.Web
                         idx += utf8BytesConsumedForScalar;
                     }
                 }
+                Debug.Assert(idx == utf8Text.Length);
 
                 idx = -1; // All bytes are allowed.
 
index 3258bc0..424ee54 100644 (file)
@@ -153,8 +153,9 @@ namespace System.Text.Encodings.Web
                         // Load the next 16 bytes.
                         Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);
 
-                        Vector128<sbyte> mask = Sse2Helper.CreateAsciiMask(sourceValue);
-                        int index = Sse2.MoveMask(mask);
+                        // Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
+                        // casted to signed byte.
+                        int index = Sse2.MoveMask(sourceValue);
 
                         if (index != 0)
                         {
@@ -194,7 +195,7 @@ namespace System.Text.Encodings.Web
                         else
                         {
                             // Check if any of the 16 bytes need to be escaped.
-                            mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
+                             Vector128<sbyte> mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
 
                             index = Sse2.MoveMask(mask);
                             // If index == 0, that means none of the 16 bytes needed to be escaped.
@@ -245,6 +246,7 @@ namespace System.Text.Encodings.Web
                         idx += utf8BytesConsumedForScalar;
                     }
                 }
+                Debug.Assert(idx == utf8Text.Length);
 
                 idx = -1; // All bytes are allowed.
 
index c577aad..00ddd50 100644 (file)
@@ -5,9 +5,12 @@
 using System;
 using System.Buffers;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.Globalization;
 using System.IO;
 using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text.Internal;
 using System.Text.Unicode;
 using Xunit;
 
@@ -32,6 +35,7 @@ namespace System.Text.Encodings.Web.Tests
 
         [Theory]
         [MemberData(nameof(EscapingTestData))]
+        [MemberData(nameof(EscapingTestData_NonAscii))]
         public unsafe void FindFirstCharacterToEncode(char replacementChar, JavaScriptEncoder encoder, bool requiresEscaping)
         {
             Assert.Equal(-1, encoder.FindFirstCharacterToEncodeUtf8(default));
@@ -95,6 +99,7 @@ namespace System.Text.Encodings.Web.Tests
                 {
                     new object[] { 'a', JavaScriptEncoder.Default, false },              // ASCII not escaped
                     new object[] { '\u001F', JavaScriptEncoder.Default, true },          // control character within single byte range
+                    new object[] { '\u007F', JavaScriptEncoder.Default, true },          // control character, sbyte.MaxValue
                     new object[] { '\u2000', JavaScriptEncoder.Default, true },          // space character outside single byte range
                     new object[] { '\u00A2', JavaScriptEncoder.Default, true },          // non-ASCII but < 255
                     new object[] { '\uA686', JavaScriptEncoder.Default, true },          // non-ASCII above short.MaxValue
@@ -111,6 +116,7 @@ namespace System.Text.Encodings.Web.Tests
 
                     new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), false },
                     new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+                    new object[] { '\u007F', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
                     new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
                     new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
                     new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
@@ -124,38 +130,6 @@ namespace System.Text.Encodings.Web.Tests
                     new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
                     new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
                     new object[] { '\uFFFD', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
-
-                    new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.All), false },
-                    new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.All), true },
-                    new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.All), true },
-                    new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.All), false },
-                    new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.All), false },
-                    new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.All), false },
-                    new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.All), true },
-                    new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.All), true },
-                    new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.All), true },
-                    new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.All), true },
-                    new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.All), true },
-                    new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.All), true },
-                    new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.All), true },
-                    new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.All), true },
-                    new object[] { '\uFFFD', JavaScriptEncoder.Create(UnicodeRanges.All), false },
-
-                    new object[] { 'a', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
-                    new object[] { '\u001F', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
-                    new object[] { '\u2000', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
-                    new object[] { '\u00A2', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
-                    new object[] { '\uA686', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
-                    new object[] { '\u6C49', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
-                    new object[] { '"', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
-                    new object[] { '\\', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
-                    new object[] { '<', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
-                    new object[] { '>', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
-                    new object[] { '&', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
-                    new object[] { '`', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
-                    new object[] { '\'', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
-                    new object[] { '+', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
-                    new object[] { '\uFFFD', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
                 };
             }
         }
@@ -203,6 +177,7 @@ namespace System.Text.Encodings.Web.Tests
                 {
                     new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.All), false },
                     new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+                    new object[] { '\u007F', JavaScriptEncoder.Create(UnicodeRanges.All), true },
                     new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.All), true },
                     new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.All), false },
                     new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.All), false },
@@ -219,6 +194,7 @@ namespace System.Text.Encodings.Web.Tests
 
                     new object[] { 'a', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
                     new object[] { '\u001F', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+                    new object[] { '\u007F', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
                     new object[] { '\u2000', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
                     new object[] { '\u00A2', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
                     new object[] { '\uA686', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
@@ -232,6 +208,23 @@ namespace System.Text.Encodings.Web.Tests
                     new object[] { '\'', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
                     new object[] { '+', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
                     new object[] { '\uFFFD', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+
+                    new object[] { 'a', new MyCustomEncoder(UnicodeRanges.All), false },
+                    new object[] { '\u001F', new MyCustomEncoder(UnicodeRanges.All), true },
+                    new object[] { '\u007F', new MyCustomEncoder(UnicodeRanges.All), true },
+                    new object[] { '\u2000', new MyCustomEncoder(UnicodeRanges.All), true },
+                    new object[] { '\u00A2', new MyCustomEncoder(UnicodeRanges.All), false },
+                    new object[] { '\uA686', new MyCustomEncoder(UnicodeRanges.All), false },
+                    new object[] { '\u6C49', new MyCustomEncoder(UnicodeRanges.All), false },
+                    new object[] { '"', new MyCustomEncoder(UnicodeRanges.All), true },
+                    new object[] { '\\', new MyCustomEncoder(UnicodeRanges.All), true },
+                    new object[] { '<', new MyCustomEncoder(UnicodeRanges.All), true },
+                    new object[] { '>', new MyCustomEncoder(UnicodeRanges.All), true },
+                    new object[] { '&', new MyCustomEncoder(UnicodeRanges.All), true },
+                    new object[] { '`', new MyCustomEncoder(UnicodeRanges.All), true },
+                    new object[] { '\'', new MyCustomEncoder(UnicodeRanges.All), true },
+                    new object[] { '+', new MyCustomEncoder(UnicodeRanges.All), true },
+                    new object[] { '\uFFFD', new MyCustomEncoder(UnicodeRanges.All), false },
                 };
             }
         }
@@ -305,6 +298,7 @@ namespace System.Text.Encodings.Web.Tests
                     new object[] { JavaScriptEncoder.Create(UnicodeRanges.BasicLatin) },
                     new object[] { JavaScriptEncoder.Create(UnicodeRanges.All) },
                     new object[] { JavaScriptEncoder.UnsafeRelaxedJsonEscaping },
+                    new object[] { new MyCustomEncoder(UnicodeRanges.BasicLatin) },
                 };
             }
         }
@@ -359,10 +353,89 @@ namespace System.Text.Encodings.Web.Tests
 
                     new object[] { '\uD801', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin) },
                     new object[] { '\uDC01', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin) },
+
+                    new object[] { '\uD801', new MyCustomEncoder(UnicodeRanges.BasicLatin) },
+                    new object[] { '\uDC01', new MyCustomEncoder(UnicodeRanges.BasicLatin) },
                 };
             }
         }
 
+        internal sealed class MyCustomEncoder : JavaScriptEncoder
+        {
+            private readonly AllowedCharactersBitmap _allowedCharacters;
+
+            public MyCustomEncoder(TextEncoderSettings filter)
+            {
+                if (filter == null)
+                {
+                    throw new ArgumentNullException(nameof(filter));
+                }
+
+                _allowedCharacters = filter.GetAllowedCharacters();
+
+                // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
+                // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
+                _allowedCharacters.ForbidUndefinedCharacters();
+
+                // Forbid characters that are special in HTML.
+                // Even though this is a not HTML encoder,
+                // it's unfortunately common for developers to
+                // forget to HTML-encode a string once it has been JS-encoded,
+                // so this offers extra protection.
+                ForbidHtmlCharacters(_allowedCharacters);
+
+                // '\' (U+005C REVERSE SOLIDUS) must always be escaped in Javascript / ECMAScript / JSON.
+                // '/' (U+002F SOLIDUS) is not Javascript / ECMAScript / JSON-sensitive so doesn't need to be escaped.
+                _allowedCharacters.ForbidCharacter('\\');
+
+                // '`' (U+0060 GRAVE ACCENT) is ECMAScript-sensitive (see ECMA-262).
+                _allowedCharacters.ForbidCharacter('`');
+            }
+
+            internal static void ForbidHtmlCharacters(AllowedCharactersBitmap allowedCharacters)
+            {
+                allowedCharacters.ForbidCharacter('<');
+                allowedCharacters.ForbidCharacter('>');
+                allowedCharacters.ForbidCharacter('&');
+                allowedCharacters.ForbidCharacter('\''); // can be used to escape attributes
+                allowedCharacters.ForbidCharacter('\"'); // can be used to escape attributes
+                allowedCharacters.ForbidCharacter('+'); // technically not HTML-specific, but can be used to perform UTF7-based attacks
+            }
+
+            public MyCustomEncoder(params UnicodeRange[] allowedRanges) : this(new TextEncoderSettings(allowedRanges))
+            { }
+
+            public override int MaxOutputCharactersPerInputCharacter => 12; // "\uFFFF\uFFFF" is the longest encoded form
+
+            public override unsafe bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten)
+            {
+                throw new NotImplementedException();
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public override unsafe int FindFirstCharacterToEncode(char* text, int textLength)
+            {
+                if (text == null)
+                {
+                    throw new ArgumentNullException(nameof(text));
+                }
+
+                return _allowedCharacters.FindFirstCharacterToEncode(text, textLength);
+            }
+
+            public override bool WillEncode(int unicodeScalar)
+            {
+                if (UnicodeHelpers.IsSupplementaryCodePoint(unicodeScalar))
+                {
+                    return true;
+                }
+
+                Debug.Assert(unicodeScalar >= char.MinValue && unicodeScalar <= char.MaxValue);
+
+                return !_allowedCharacters.IsUnicodeScalarAllowed(unicodeScalar);
+            }
+        }
+
         [Fact]
         public void TestSurrogate()
         {
index 2a51013..a6e0c8c 100644 (file)
@@ -5,9 +5,12 @@
 using System.Buffers;
 using System.Buffers.Text;
 using System.Diagnostics;
-using System.Runtime.CompilerServices;  // Do not remove. Needed for Int32LsbToHexDigit when !BUILDING_INBOX_LIBRARY
 using System.Text.Encodings.Web;
 
+#if !BUILDING_INBOX_LIBRARY
+using System.Runtime.CompilerServices;
+#endif
+
 namespace System.Text.Json
 {
     // TODO: Replace the escaping logic with publicly shipping APIs from https://github.com/dotnet/corefx/issues/33509
@@ -59,7 +62,7 @@ namespace System.Text.Json
         public static unsafe int NeedsEscaping(ReadOnlySpan<char> value, JavaScriptEncoder encoder)
         {
             // Some implementations of JavaScriptEncoder.FindFirstCharacterToEncode may not accept
-            // null pointers and gaurd against that. Hence, check up-front to return -1.
+            // null pointers and guard against that. Hence, check up-front to return -1.
             if (value.IsEmpty)
             {
                 return -1;