using System.Buffers;
using System.Buffers.Text;
using System.Diagnostics;
+using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text.Encodings.Web;
+#if BUILDING_INBOX_LIBRARY
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
+
namespace System.Text.Json
{
// TODO: Replace the escaping logic with publicly shipping APIs from https://github.com/dotnet/corefx/issues/33509
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool NeedsEscaping(char value) => value > LastAsciiCharacter || AllowList[value] == 0;
- public static int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder encoder)
+#if BUILDING_INBOX_LIBRARY
+ private static readonly Vector128<short> s_mask_UInt16_0x20 = Vector128.Create((short)0x20); // Space ' '
+
+ private static readonly Vector128<short> s_mask_UInt16_0x22 = Vector128.Create((short)0x22); // Quotation Mark '"'
+ private static readonly Vector128<short> s_mask_UInt16_0x26 = Vector128.Create((short)0x26); // Ampersand '&'
+ private static readonly Vector128<short> s_mask_UInt16_0x27 = Vector128.Create((short)0x27); // Apostrophe '''
+ private static readonly Vector128<short> s_mask_UInt16_0x2B = Vector128.Create((short)0x2B); // Plus sign '+'
+ private static readonly Vector128<short> s_mask_UInt16_0x3C = Vector128.Create((short)0x3C); // Less Than Sign '<'
+ private static readonly Vector128<short> s_mask_UInt16_0x3E = Vector128.Create((short)0x3E); // Greater Than Sign '>'
+ private static readonly Vector128<short> s_mask_UInt16_0x5C = Vector128.Create((short)0x5C); // Reverse Solidus '\'
+ private static readonly Vector128<short> s_mask_UInt16_0x60 = Vector128.Create((short)0x60); // Grave Access '`'
+
+ private static readonly Vector128<short> s_mask_UInt16_0x7E = Vector128.Create((short)0x7E); // Tilde '~'
+
+ private static readonly Vector128<sbyte> s_mask_SByte_0x20 = Vector128.Create((sbyte)0x20); // Space ' '
+
+ private static readonly Vector128<sbyte> s_mask_SByte_0x22 = Vector128.Create((sbyte)0x22); // Quotation Mark '"'
+ private static readonly Vector128<sbyte> s_mask_SByte_0x26 = Vector128.Create((sbyte)0x26); // Ampersand '&'
+ private static readonly Vector128<sbyte> s_mask_SByte_0x27 = Vector128.Create((sbyte)0x27); // Apostrophe '''
+ private static readonly Vector128<sbyte> s_mask_SByte_0x2B = Vector128.Create((sbyte)0x2B); // Plus sign '+'
+ private static readonly Vector128<sbyte> s_mask_SByte_0x3C = Vector128.Create((sbyte)0x3C); // Less Than Sign '<'
+ private static readonly Vector128<sbyte> s_mask_SByte_0x3E = Vector128.Create((sbyte)0x3E); // Greater Than Sign '>'
+ private static readonly Vector128<sbyte> s_mask_SByte_0x5C = Vector128.Create((sbyte)0x5C); // Reverse Solidus '\'
+ private static readonly Vector128<sbyte> s_mask_SByte_0x60 = Vector128.Create((sbyte)0x60); // Grave Access '`'
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector128<short> CreateEscapingMask(Vector128<short> sourceValue)
{
- int idx;
+ Debug.Assert(Sse2.IsSupported);
- if (encoder != null)
- {
- idx = encoder.FindFirstCharacterToEncodeUtf8(value);
- goto Return;
- }
+ Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x20); // Space ' ', anything in the control characters range
+
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x22)); // Quotation Mark '"'
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x26)); // Ampersand '&'
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x27)); // Apostrophe '''
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x2B)); // Plus sign '+'
+
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3C)); // Less Than Sign '<'
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3E)); // Greater Than Sign '>'
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x5C)); // Reverse Solidus '\'
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x60)); // Grave Access '`'
+
+ mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range
+
+ return mask;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector128<sbyte> CreateEscapingMask(Vector128<sbyte> sourceValue)
+ {
+ Debug.Assert(Sse2.IsSupported);
- for (idx = 0; idx < value.Length; idx++)
+ Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x20); // Control characters, and anything above 0x7E since sbyte.MaxValue is 0x7E
+
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x22)); // Quotation Mark "
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x26)); // Ampersand &
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x27)); // Apostrophe '
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x2B)); // Plus sign +
+
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3C)); // Less Than Sign <
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3E)); // Greater Than Sign >
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x5C)); // Reverse Solidus \
+ mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x60)); // Grave Access `
+
+ return mask;
+ }
+#endif
+
+ public static unsafe int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder encoder)
+ {
+ fixed (byte* ptr = value)
{
- if (NeedsEscaping(value[idx]))
+ int idx = 0;
+
+ if (encoder != null)
{
+ idx = encoder.FindFirstCharacterToEncodeUtf8(value);
goto Return;
}
- }
- idx = -1; // all characters allowed
+#if BUILDING_INBOX_LIBRARY
+ if (Sse2.IsSupported)
+ {
+ sbyte* startingAddress = (sbyte*)ptr;
+ while (value.Length - 16 >= idx)
+ {
+ Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 16));
+
+ // Load the next 16 bytes.
+ Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);
+
+ // Check if any of the 16 bytes need to be escaped.
+ Vector128<sbyte> mask = CreateEscapingMask(sourceValue);
+
+ int index = Sse2.MoveMask(mask.AsByte());
+ // If index == 0, that means none of the 16 bytes needed to be escaped.
+ // TrailingZeroCount is relatively expensive, avoid it if possible.
+ if (index != 0)
+ {
+ // Found at least one byte that needs to be escaped, figure out the index of
+ // the first one found that needed to be escaped within the 16 bytes.
+ Debug.Assert(index > 0 && index <= 65_535);
+ int tzc = BitOperations.TrailingZeroCount(index);
+ Debug.Assert(tzc >= 0 && tzc <= 16);
+ idx += tzc;
+ goto Return;
+ }
+ idx += 16;
+ startingAddress += 16;
+ }
+
+ // Process the remaining characters.
+ Debug.Assert(value.Length - idx < 16);
+ }
+#endif
+
+ for (; idx < value.Length; idx++)
+ {
+ Debug.Assert((ptr + idx) <= (ptr + value.Length));
+ if (NeedsEscaping(*(ptr + idx)))
+ {
+ goto Return;
+ }
+ }
- Return:
- return idx;
+ idx = -1; // all characters allowed
+
+ Return:
+ return idx;
+ }
}
public static unsafe int NeedsEscaping(ReadOnlySpan<char> value, JavaScriptEncoder encoder)
{
- int idx;
-
- // Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept
- // null pointers and gaurd against that. Hence, check up-front and fall down to return -1.
- if (encoder != null && !value.IsEmpty)
+ fixed (char* ptr = value)
{
- fixed (char* ptr = value)
+ int idx = 0;
+
+ // Some implementations of JavascriptEncoder.FindFirstCharacterToEncode may not accept
+ // null pointers and gaurd against that. Hence, check up-front and fall down to return -1.
+ if (encoder != null && !value.IsEmpty)
{
idx = encoder.FindFirstCharacterToEncode(ptr, value.Length);
+ goto Return;
}
- goto Return;
- }
- for (idx = 0; idx < value.Length; idx++)
- {
- if (NeedsEscaping(value[idx]))
+#if BUILDING_INBOX_LIBRARY
+ if (Sse2.IsSupported)
{
- goto Return;
+ short* startingAddress = (short*)ptr;
+ while (value.Length - 8 >= idx)
+ {
+ Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + value.Length - 8));
+
+ // Load the next 8 characters.
+ Vector128<short> sourceValue = Sse2.LoadVector128(startingAddress);
+
+ // Check if any of the 8 characters need to be escaped.
+ Vector128<short> mask = CreateEscapingMask(sourceValue);
+
+ int index = Sse2.MoveMask(mask.AsByte());
+ // If index == 0, that means none of the 8 characters needed to be escaped.
+ // TrailingZeroCount is relatively expensive, avoid it if possible.
+ if (index != 0)
+ {
+ // Found at least one character that needs to be escaped, figure out the index of
+ // the first one found that needed to be escaped within the 8 characters.
+ Debug.Assert(index > 0 && index <= 65_535);
+ int tzc = BitOperations.TrailingZeroCount(index);
+ Debug.Assert(tzc % 2 == 0 && tzc >= 0 && tzc <= 16);
+ idx += tzc >> 1;
+ goto Return;
+ }
+ idx += 8;
+ startingAddress += 8;
+ }
+
+ // Process the remaining characters.
+ Debug.Assert(value.Length - idx < 8);
+ }
+#endif
+
+ for (; idx < value.Length; idx++)
+ {
+ Debug.Assert((ptr + idx) <= (ptr + value.Length));
+ if (NeedsEscaping(*(ptr + idx)))
+ {
+ goto Return;
+ }
}
- }
- idx = -1; // all characters allowed
+ idx = -1; // All characters are allowed.
- Return:
- return idx;
+ Return:
+ return idx;
+ }
}
public static int GetMaxEscapedLength(int textLength, int firstIndexToEscape)
JsonTestHelper.AssertContents("\"\u2020\\\"\"", output);
}
+ [Theory]
+ [MemberData(nameof(EscapingTestData))]
+ public void EscapingTestWhileWriting(char replacementChar, JavaScriptEncoder encoder, bool requiresEscaping)
+ {
+ var writerOptions = new JsonWriterOptions { Encoder = encoder };
+
+ {
+ ReadOnlyMemory<byte> written = WriteStringHelper(writerOptions, null);
+ Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+ written = WriteUtf8StringHelper(writerOptions, null);
+ Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+ written = WriteStringHelper(writerOptions, string.Empty);
+ Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+ written = WriteUtf8StringHelper(writerOptions, Array.Empty<byte>());
+ Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+ }
+
+ var random = new Random(42);
+ for (int dataLength = 0; dataLength < 50; dataLength++)
+ {
+ char[] str = new char[dataLength];
+ for (int i = 0; i < dataLength; i++)
+ {
+ str[i] = (char)random.Next(97, 123);
+ }
+ string baseStr = new string(str);
+ byte[] sourceUtf8 = Encoding.UTF8.GetBytes(baseStr);
+
+ ReadOnlyMemory<byte> written = WriteStringHelper(writerOptions, baseStr);
+ Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+ written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+ Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+ for (int i = 0; i < dataLength; i++)
+ {
+ char[] changed = baseStr.ToCharArray();
+ changed[i] = replacementChar;
+ string newStr = new string(changed);
+ sourceUtf8 = Encoding.UTF8.GetBytes(newStr);
+
+ written = WriteStringHelper(writerOptions, newStr);
+ int escapedIndex = written.Span.IndexOf((byte)'\\');
+ Assert.Equal(requiresEscaping ? (i + 1) : -1, escapedIndex); // Account for the start quote
+
+ written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+ escapedIndex = written.Span.IndexOf((byte)'\\');
+ Assert.Equal(requiresEscaping ? (i + 1) : -1, escapedIndex); // Account for the start quote
+ }
+
+ if (dataLength != 0)
+ {
+ char[] changed = baseStr.ToCharArray();
+ changed.AsSpan().Fill(replacementChar);
+ string newStr = new string(changed);
+ sourceUtf8 = Encoding.UTF8.GetBytes(newStr);
+
+ written = WriteStringHelper(writerOptions, newStr);
+ int escapedIndex = written.Span.IndexOf((byte)'\\');
+ Assert.Equal(requiresEscaping ? 1 : -1, escapedIndex); // Account for the start quote
+
+ written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+ escapedIndex = written.Span.IndexOf((byte)'\\');
+ Assert.Equal(requiresEscaping ? 1 : -1, escapedIndex); // Account for the start quote
+ }
+ }
+ }
+
+ public static IEnumerable<object[]> EscapingTestData
+ {
+ get
+ {
+ return new List<object[]>
+ {
+ new object[] { 'a', null, false }, // ASCII not escaped
+ new object[] { '\u001F', null, true }, // control character within single byte range
+ new object[] { '\u2000', null, true }, // space character outside single byte range
+ new object[] { '\u00A2', null, true }, // non-ASCII but < 255
+ new object[] { '\uA686', null, true }, // non-ASCII above short.MaxValue
+ new object[] { '\u6C49', null, true }, // non-ASCII from chinese alphabet - multibyte
+ new object[] { '"', null, true }, // ASCII but must always be escaped in JSON
+ new object[] { '\\', null, true }, // ASCII but must always be escaped in JSON
+ new object[] { '<', null, true }, // ASCII but escaped by default
+ new object[] { '>', null, true }, // ASCII but escaped by default
+ new object[] { '&', null, true }, // ASCII but escaped by default
+ new object[] { '`', null, true }, // ASCII but escaped by default
+ new object[] { '\'', null, true }, // ASCII but escaped by default
+ new object[] { '+', null, true }, // ASCII but escaped by default
+
+ new object[] { 'a', JavaScriptEncoder.Default, false },
+ new object[] { '\u001F', JavaScriptEncoder.Default, true },
+ new object[] { '\u2000', JavaScriptEncoder.Default, true },
+ new object[] { '\u00A2', JavaScriptEncoder.Default, true },
+ new object[] { '\uA686', JavaScriptEncoder.Default, true },
+ new object[] { '\u6C49', JavaScriptEncoder.Default, true },
+ new object[] { '"', JavaScriptEncoder.Default, true },
+ new object[] { '\\', JavaScriptEncoder.Default, true },
+ new object[] { '<', JavaScriptEncoder.Default, true },
+ new object[] { '>', JavaScriptEncoder.Default, true },
+ new object[] { '&', JavaScriptEncoder.Default, true },
+ new object[] { '`', JavaScriptEncoder.Default, true },
+ new object[] { '\'', JavaScriptEncoder.Default, true },
+ new object[] { '+', JavaScriptEncoder.Default, true },
+
+ new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), false },
+ new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+
+ new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+ new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+ new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+ new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+ new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+
+ new object[] { 'a', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '\u001F', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+ new object[] { '\u2000', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+ new object[] { '\u00A2', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '\uA686', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '\u6C49', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '"', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+ new object[] { '\\', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+ new object[] { '<', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '>', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '&', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '`', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '\'', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '+', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ };
+ }
+ }
+
+ [Theory]
+ [MemberData(nameof(EscapingTestData_NonAscii))]
+ public unsafe void WriteString_NonAscii(char replacementChar, JavaScriptEncoder encoder, bool requiresEscaping)
+ {
+ var writerOptions = new JsonWriterOptions { Encoder = encoder };
+ var random = new Random(42);
+ for (int dataLength = 1; dataLength < 50; dataLength++)
+ {
+ char[] str = new char[dataLength];
+ for (int i = 0; i < dataLength; i++)
+ {
+ str[i] = (char)random.Next(0x2E9B, 0x2EF4); // CJK Radicals Supplement characters
+ }
+ string baseStr = new string(str);
+ byte[] sourceUtf8 = Encoding.UTF8.GetBytes(baseStr);
+
+ ReadOnlyMemory<byte> written = WriteStringHelper(writerOptions, baseStr);
+ Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+ written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+ Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+ for (int i = 0; i < dataLength; i++)
+ {
+ string source = baseStr.Insert(i, new string(replacementChar, 1));
+ sourceUtf8 = Encoding.UTF8.GetBytes(source);
+
+ written = WriteStringHelper(writerOptions, source);
+ int escapedIndex = written.Span.IndexOf((byte)'\\');
+ // Each CJK character expands to 3 utf-8 bytes.
+ Assert.Equal(requiresEscaping ? ((i * 3) + 1) : -1, escapedIndex); // Account for the start quote
+
+ written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+ escapedIndex = written.Span.IndexOf((byte)'\\');
+ // Each CJK character expands to 3 utf-8 bytes.
+ Assert.Equal(requiresEscaping ? ((i * 3) + 1) : -1, escapedIndex); // Account for the start quote
+ }
+ }
+ }
+
+ public static IEnumerable<object[]> EscapingTestData_NonAscii
+ {
+ get
+ {
+ return new List<object[]>
+ {
+ new object[] { 'a', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+ new object[] { '\u001F', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '\u2000', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '\u00A2', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+ new object[] { '\uA686', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+ new object[] { '\u6C49', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+ new object[] { '"', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '\\', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '<', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '>', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '&', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '`', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '\'', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+ new object[] { '+', JavaScriptEncoder.Create(UnicodeRanges.All), true },
+
+ new object[] { 'a', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '\u001F', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+ new object[] { '\u2000', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+ new object[] { '\u00A2', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '\uA686', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '\u6C49', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '"', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+ new object[] { '\\', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, true },
+ new object[] { '<', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '>', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '&', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '`', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '\'', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '+', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ };
+ }
+ }
+
+ [Theory]
+ [MemberData(nameof(JavaScriptEncoders))]
+ public void EscapingTestWhileWritingSurrogate(JavaScriptEncoder encoder)
+ {
+ char highSurrogate = '\uD801';
+ char lowSurrogate = '\uDC37';
+ var writerOptions = new JsonWriterOptions { Encoder = encoder };
+ var random = new Random(42);
+ for (int dataLength = 2; dataLength < 50; dataLength++)
+ {
+ char[] str = new char[dataLength];
+ for (int i = 0; i < dataLength; i++)
+ {
+ str[i] = (char)random.Next(97, 123);
+ }
+ string baseStr = new string(str);
+ byte[] sourceUtf8 = Encoding.UTF8.GetBytes(baseStr);
+
+ ReadOnlyMemory<byte> written = WriteStringHelper(writerOptions, baseStr);
+ Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+ written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+ Assert.Equal(-1, written.Span.IndexOf((byte)'\\'));
+
+ for (int i = 0; i < dataLength - 1; i++)
+ {
+ char[] changed = baseStr.ToCharArray();
+ changed[i] = highSurrogate;
+ changed[i + 1] = lowSurrogate;
+ string newStr = new string(changed);
+ sourceUtf8 = Encoding.UTF8.GetBytes(newStr);
+
+ written = WriteStringHelper(writerOptions, newStr);
+ int escapedIndex = written.Span.IndexOf((byte)'\\');
+ Assert.Equal(i + 1, escapedIndex); // Account for the start quote
+
+ written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+ escapedIndex = written.Span.IndexOf((byte)'\\');
+ Assert.Equal(i + 1, escapedIndex); // Account for the start quote
+ }
+
+ {
+ char[] changed = baseStr.ToCharArray();
+
+ for (int i = 0; i < changed.Length - 1; i += 2)
+ {
+ changed[i] = highSurrogate;
+ changed[i + 1] = lowSurrogate;
+ }
+
+ string newStr = new string(changed);
+ sourceUtf8 = Encoding.UTF8.GetBytes(newStr);
+
+ written = WriteStringHelper(writerOptions, newStr);
+ int escapedIndex = written.Span.IndexOf((byte)'\\');
+ Assert.Equal(1, escapedIndex); // Account for the start quote
+
+ written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+ escapedIndex = written.Span.IndexOf((byte)'\\');
+ Assert.Equal(1, escapedIndex); // Account for the start quote
+ }
+ }
+ }
+
+ public static IEnumerable<object[]> JavaScriptEncoders
+ {
+ get
+ {
+ return new List<object[]>
+ {
+ new object[] { null },
+ new object[] { JavaScriptEncoder.Default },
+ new object[] { JavaScriptEncoder.Create(UnicodeRanges.BasicLatin) },
+ new object[] { JavaScriptEncoder.Create(UnicodeRanges.All) },
+ new object[] { JavaScriptEncoder.UnsafeRelaxedJsonEscaping },
+ };
+ }
+ }
+
+ [Theory]
+ [MemberData(nameof(InvalidEscapingTestData))]
+ public unsafe void WriteStringInvalidCharacter(char replacementChar, JavaScriptEncoder encoder, bool requiresEscaping)
+ {
+ var writerOptions = new JsonWriterOptions { Encoder = encoder };
+ var random = new Random(42);
+ for (int dataLength = 0; dataLength < 47; dataLength++)
+ {
+ char[] str = new char[dataLength];
+ for (int i = 0; i < dataLength; i++)
+ {
+ str[i] = (char)random.Next(97, 123);
+ }
+ string baseStr = new string(str);
+ byte[] baseStrUtf8 = Encoding.UTF8.GetBytes(baseStr);
+
+ for (int i = 0; i < dataLength; i++)
+ {
+ char[] changed = baseStr.ToCharArray();
+ changed[i] = replacementChar;
+ string source = new string(changed);
+ byte[] sourceUtf8 = new byte[baseStrUtf8.Length];
+ baseStrUtf8.AsSpan().CopyTo(sourceUtf8);
+ sourceUtf8[i] = 0xC3; // Invalid, first byte of a 2-byte utf-8 character
+
+ ReadOnlyMemory<byte> written = WriteStringHelper(writerOptions, source);
+ // Some encoders don't escape replacement character
+ Assert.Equal(requiresEscaping ? i + 1 : -1, written.Span.IndexOf((byte)'\\')); // Account for the start quote
+
+ written = WriteUtf8StringHelper(writerOptions, sourceUtf8);
+ // Some encoders don't escape replacement character
+ Assert.Equal(requiresEscaping ? i + 1 : -1, written.Span.IndexOf((byte)'\\')); // Account for the start quote
+ }
+ }
+ }
+
+ public static IEnumerable<object[]> InvalidEscapingTestData
+ {
+ get
+ {
+ return new List<object[]>
+ {
+ new object[] { '\uD801', JavaScriptEncoder.Default, true }, // Invalid, high surrogate alone
+ new object[] { '\uDC01', JavaScriptEncoder.Default, true }, // Invalid, low surrogate alone
+
+ new object[] { '\uD801', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+ new object[] { '\uDC01', JavaScriptEncoder.UnsafeRelaxedJsonEscaping, false },
+
+ new object[] { '\uD801', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+ new object[] { '\uDC01', JavaScriptEncoder.Create(UnicodeRanges.All), false },
+
+ new object[] { '\uD801', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ new object[] { '\uDC01', JavaScriptEncoder.Create(UnicodeRanges.BasicLatin), true },
+ };
+ }
+ }
+
+ private static ReadOnlyMemory<byte> WriteStringHelper(JsonWriterOptions writerOptions, string str)
+ {
+ var output = new ArrayBufferWriter<byte>();
+ using (var writer = new Utf8JsonWriter(output, writerOptions))
+ {
+ writer.WriteStringValue(str);
+ }
+ return output.WrittenMemory;
+ }
+
+ private static ReadOnlyMemory<byte> WriteUtf8StringHelper(JsonWriterOptions writerOptions, byte[] utf8str)
+ {
+ var output = new ArrayBufferWriter<byte>();
+ using (var writer = new Utf8JsonWriter(output, writerOptions))
+ {
+ writer.WriteStringValue(utf8str);
+ }
+ return output.WrittenMemory;
+ }
+
[Fact]
public void WriteJsonWritesToIBWOnDemand_Dispose()
{