<data name="CallFlushToAvoidDataLoss" xml:space="preserve">
<value>The JSON writer needs to be flushed before getting the current state. There are {0} bytes that have not been committed to the output.</value>
</data>
+ <data name="CannotReadIncompleteUTF16" xml:space="preserve">
+ <value>Cannot read incomplete UTF-16 JSON text as string with missing low surrogate.</value>
+ </data>
+ <data name="CannotReadInvalidUTF16" xml:space="preserve">
+ <value>Cannot read invalid UTF-16 JSON text as string. Invalid surrogate value: '{0}'.</value>
+ </data>
<data name="CannotStartObjectArrayAfterPrimitiveOrClose" xml:space="preserve">
<value>Cannot write the start of an object/array after a single JSON value or outside of an existing closed object/array. Current token type is '{0}'.</value>
</data>
<data name="CannotStartObjectArrayWithoutProperty" xml:space="preserve">
<value>Cannot write the start of an object or array without a property name. Current token type is '{0}'.</value>
</data>
+ <data name="CannotTranscodeInvalidUtf8" xml:space="preserve">
+ <value>Cannot transcode invalid UTF-8 JSON text to UTF-16 string.</value>
+ </data>
<data name="CannotWriteInvalidUTF16" xml:space="preserve">
- <value>Cannot write invalid UTF-16 text as JSON. Invalid surrogate pair: '{0}'.</value>
+ <value>Cannot write invalid UTF-16 text as JSON. Invalid surrogate value: '{0}'.</value>
</data>
<data name="CannotWriteInvalidUTF8" xml:space="preserve">
<value>Cannot write invalid UTF-8 text as JSON. Invalid input: '{0}'.</value>
<Compile Include="System\Text\Json\BitStack.cs" />
<Compile Include="System\Text\Json\JsonCommentHandling.cs" />
<Compile Include="System\Text\Json\JsonConstants.cs" />
+ <Compile Include="System\Text\Json\JsonHelpers.cs" />
<Compile Include="System\Text\Json\JsonTokenType.cs" />
<Compile Include="System\Text\Json\ThrowHelper.cs" />
<Compile Include="System\Text\Json\Reader\ConsumeNumberResult.cs" />
<Compile Include="System\Text\Json\Reader\ConsumeTokenResult.cs" />
<Compile Include="System\Text\Json\Reader\JsonReaderException.cs" />
<Compile Include="System\Text\Json\Reader\JsonReaderHelper.cs" />
+ <Compile Include="System\Text\Json\Reader\JsonReaderHelper.Unescaping.cs" />
<Compile Include="System\Text\Json\Reader\JsonReaderOptions.cs" />
<Compile Include="System\Text\Json\Reader\JsonReaderState.cs" />
<Compile Include="System\Text\Json\Reader\Utf8JsonReader.cs" />
public const int MaxWriterDepth = 1_000;
public const int RemoveFlagsBitMask = 0x7FFFFFFF;
+ public const int StackallocThreshold = 256;
+
// In the worst case, an ASCII character represented as a single utf-8 byte could expand 6x when escaped.
// For example: '+' becomes '\u0043'
// Escaping surrogate pairs (represented by 3 or 4 utf-8 bytes) would expand to 12 bytes (which is still <= 6x).
public const int HighSurrogateEndValue = 0xDBFF;
public const int LowSurrogateStartValue = 0xDC00;
public const int LowSurrogateEndValue = 0xDFFF;
- public const int ShiftRightBy10 = 0x400;
+ public const int BitShiftBy10 = 0x400;
}
}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Runtime.CompilerServices;
+
+namespace System.Text.Json
+{
+ internal static partial class JsonHelpers
+ {
+ /// <summary>
+ /// Returns <see langword="true"/> iff <paramref name="value"/> is a valid Unicode scalar
+ /// value, i.e., is in [ U+0000..U+D7FF ], inclusive; or [ U+E000..U+10FFFF ], inclusive.
+ /// </summary>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool IsValidUnicodeScalar(uint value)
+ {
+ // By XORing the incoming value with 0xD800, surrogate code points
+ // are moved to the range [ U+0000..U+07FF ], and all valid scalar
+ // values are clustered into the single range [ U+0800..U+10FFFF ],
+ // which allows performing a single fast range check.
+
+ return IsInRangeInclusive(value ^ 0xD800U, 0x800U, 0x10FFFFU);
+ }
+
+ /// <summary>
+ /// Returns <see langword="true"/> iff <paramref name="value"/> is between
+ /// <paramref name="lowerBound"/> and <paramref name="upperBound"/>, inclusive.
+ /// </summary>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool IsInRangeInclusive(uint value, uint lowerBound, uint upperBound)
+ => (value - lowerBound) <= (upperBound - lowerBound);
+
+ /// <summary>
+ /// Returns <see langword="true"/> iff <paramref name="value"/> is between
+ /// <paramref name="lowerBound"/> and <paramref name="upperBound"/>, inclusive.
+ /// </summary>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool IsInRangeInclusive(byte value, byte lowerBound, byte upperBound)
+ => ((byte)(value - lowerBound) <= (byte)(upperBound - lowerBound));
+
+ /// <summary>
+ /// Returns <see langword="true"/> iff <paramref name="value"/> is between
+ /// <paramref name="lowerBound"/> and <paramref name="upperBound"/>, inclusive.
+ /// </summary>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool IsInRangeInclusive(int value, int lowerBound, int upperBound)
+ => (uint)(value - lowerBound) <= (uint)(upperBound - lowerBound);
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Buffers.Text;
+using System.Diagnostics;
+
+namespace System.Text.Json
+{
+ internal static partial class JsonReaderHelper
+ {
+ // Reject any invalid UTF-8 data rather than silently replacing.
+ public static readonly UTF8Encoding s_utf8Encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
+
+ // TODO: Similar to escaping, replace the unescaping logic with publicly shipping APIs from https://github.com/dotnet/corefx/issues/33509
+ public static string GetUnescapedString(ReadOnlySpan<byte> utf8Source, int idx)
+ {
+ byte[] unescapedArray = null;
+
+ Span<byte> utf8Unescaped = utf8Source.Length <= JsonConstants.StackallocThreshold ?
+ stackalloc byte[utf8Source.Length] :
+ (unescapedArray = ArrayPool<byte>.Shared.Rent(utf8Source.Length));
+
+ Unescape(utf8Source, utf8Unescaped, idx, out int written);
+ Debug.Assert(written > 0);
+
+ utf8Unescaped = utf8Unescaped.Slice(0, written);
+ Debug.Assert(!utf8Unescaped.IsEmpty);
+
+ string utf8String = TranscodeHelper(utf8Unescaped);
+
+ if (unescapedArray != null)
+ {
+ utf8Unescaped.Clear();
+ ArrayPool<byte>.Shared.Return(unescapedArray);
+ }
+
+ return utf8String;
+ }
+
+ public static string TranscodeHelper(ReadOnlySpan<byte> utf8Unescaped)
+ {
+ try
+ {
+#if BUILDING_INBOX_LIBRARY
+ return s_utf8Encoding.GetString(utf8Unescaped);
+#else
+ if (utf8Unescaped.IsEmpty)
+ {
+ return string.Empty;
+ }
+ unsafe
+ {
+ fixed (byte* bytePtr = utf8Unescaped)
+ {
+ return s_utf8Encoding.GetString(bytePtr, utf8Unescaped.Length);
+ }
+ }
+#endif
+ }
+ catch (DecoderFallbackException ex)
+ {
+ // We want to be consistent with the exception being thrown
+ // so the user only has to catch a single exception.
+ // Since we already throw InvalidOperationException for mismatch token type,
+ // and while unescaping, using that exception for failure to decode invalid UTF-8 bytes as well.
+ // Therefore, wrapping the DecoderFallbackException around an InvalidOperationException.
+ throw ThrowHelper.GetInvalidOperationException_ReadInvalidUTF8(ex);
+ }
+ }
+
+ private static void Unescape(ReadOnlySpan<byte> source, Span<byte> destination, int idx, out int written)
+ {
+ Debug.Assert(idx >= 0 && idx < source.Length);
+ Debug.Assert(source[idx] == JsonConstants.BackSlash);
+ Debug.Assert(destination.Length >= source.Length);
+
+ source.Slice(0, idx).CopyTo(destination);
+ written = idx;
+
+ for (; idx < source.Length; idx++)
+ {
+ byte currentByte = source[idx];
+ if (currentByte == JsonConstants.BackSlash)
+ {
+ idx++;
+ currentByte = source[idx];
+
+ if (currentByte == JsonConstants.Quote)
+ {
+ destination[written++] = JsonConstants.Quote;
+ }
+ else if (currentByte == 'n')
+ {
+ destination[written++] = JsonConstants.LineFeed;
+ }
+ else if (currentByte == 'r')
+ {
+ destination[written++] = JsonConstants.CarriageReturn;
+ }
+ else if (currentByte == JsonConstants.BackSlash)
+ {
+ destination[written++] = JsonConstants.BackSlash;
+ }
+ else if (currentByte == JsonConstants.Slash)
+ {
+ destination[written++] = JsonConstants.Slash;
+ }
+ else if (currentByte == 't')
+ {
+ destination[written++] = JsonConstants.Tab;
+ }
+ else if (currentByte == 'b')
+ {
+ destination[written++] = JsonConstants.BackSpace;
+ }
+ else if (currentByte == 'f')
+ {
+ destination[written++] = JsonConstants.FormFeed;
+ }
+ else if (currentByte == 'u')
+ {
+ // The source is known to be valid JSON, and hence if we see a \u, it is guaranteed to have 4 hex digits following it
+ // Otherwise, the Utf8JsonReader would have alreayd thrown an exception.
+ Debug.Assert(source.Length >= idx + 5);
+
+ bool result = Utf8Parser.TryParse(source.Slice(idx + 1, 4), out int scalar, out int bytesConsumed, 'x');
+ Debug.Assert(result);
+ Debug.Assert(bytesConsumed == 4);
+ idx += bytesConsumed; // The loop iteration will increment idx past the last hex digit
+
+ if (JsonHelpers.IsInRangeInclusive((uint)scalar, JsonConstants.HighSurrogateStartValue, JsonConstants.LowSurrogateEndValue))
+ {
+ // The first hex value cannot be a low surrogate.
+ if (scalar >= JsonConstants.LowSurrogateStartValue)
+ {
+ ThrowHelper.ThrowInvalidOperationException_ReadInvalidUTF16(scalar);
+ }
+
+ Debug.Assert(JsonHelpers.IsInRangeInclusive((uint)scalar, JsonConstants.HighSurrogateStartValue, JsonConstants.HighSurrogateEndValue));
+
+ idx += 3; // Skip the last hex digit and the next \u
+
+ // We must have a low surrogate following a high surrogate.
+ if (source.Length < idx + 4 || source[idx - 2] != '\\' || source[idx - 1] != 'u')
+ {
+ ThrowHelper.ThrowInvalidOperationException_ReadInvalidUTF16();
+ }
+
+ // The source is known to be valid JSON, and hence if we see a \u, it is guaranteed to have 4 hex digits following it
+ // Otherwise, the Utf8JsonReader would have alreayd thrown an exception.
+ result = Utf8Parser.TryParse(source.Slice(idx, 4), out int lowSurrogate, out bytesConsumed, 'x');
+ Debug.Assert(result);
+ Debug.Assert(bytesConsumed == 4);
+
+ // If the first hex value is a high surrogate, the next one must be a low surrogate.
+ if (!JsonHelpers.IsInRangeInclusive((uint)lowSurrogate, JsonConstants.LowSurrogateStartValue, JsonConstants.LowSurrogateEndValue))
+ {
+ ThrowHelper.ThrowInvalidOperationException_ReadInvalidUTF16(lowSurrogate);
+ }
+
+ idx += bytesConsumed - 1; // The loop iteration will increment idx past the last hex digit
+
+ // To find the unicode scalar:
+ // (0x400 * (High surrogate - 0xD800)) + Low surrogate - 0xDC00 + 0x10000
+ scalar = (JsonConstants.BitShiftBy10 * (scalar - JsonConstants.HighSurrogateStartValue))
+ + (lowSurrogate - JsonConstants.LowSurrogateStartValue)
+ + JsonConstants.UnicodePlane01StartValue;
+ }
+
+#if BUILDING_INBOX_LIBRARY
+ var rune = new Rune(scalar);
+ result = rune.TryEncodeToUtf8Bytes(destination.Slice(written), out int bytesWritten);
+ Debug.Assert(result);
+#else
+ EncodeToUtf8Bytes((uint)scalar, destination.Slice(written), out int bytesWritten);
+#endif
+ Debug.Assert(bytesWritten <= 4);
+ written += bytesWritten;
+ }
+ }
+ else
+ {
+ destination[written++] = currentByte;
+ }
+ }
+ }
+
+#if !BUILDING_INBOX_LIBRARY
+ /// <summary>
+ /// Copies the UTF-8 code unit representation of this scalar to an output buffer.
+ /// The buffer must be large enough to hold the required number of <see cref="byte"/>s.
+ /// </summary>
+ private static void EncodeToUtf8Bytes(uint scalar, Span<byte> utf8Destination, out int bytesWritten)
+ {
+ Debug.Assert(JsonHelpers.IsValidUnicodeScalar(scalar));
+ Debug.Assert(utf8Destination.Length >= 4);
+
+ if (scalar < 0x80U)
+ {
+ // Single UTF-8 code unit
+ utf8Destination[0] = (byte)scalar;
+ bytesWritten = 1;
+ }
+ else if (scalar < 0x800U)
+ {
+ // Two UTF-8 code units
+ utf8Destination[0] = (byte)(0xC0U | (scalar >> 6));
+ utf8Destination[1] = (byte)(0x80U | (scalar & 0x3FU));
+ bytesWritten = 2;
+ }
+ else if (scalar < 0x10000U)
+ {
+ // Three UTF-8 code units
+ utf8Destination[0] = (byte)(0xE0U | (scalar >> 12));
+ utf8Destination[1] = (byte)(0x80U | ((scalar >> 6) & 0x3FU));
+ utf8Destination[2] = (byte)(0x80U | (scalar & 0x3FU));
+ bytesWritten = 3;
+ }
+ else
+ {
+ // Four UTF-8 code units
+ utf8Destination[0] = (byte)(0xF0U | (scalar >> 18));
+ utf8Destination[1] = (byte)(0x80U | ((scalar >> 12) & 0x3FU));
+ utf8Destination[2] = (byte)(0x80U | ((scalar >> 6) & 0x3FU));
+ utf8Destination[3] = (byte)(0x80U | (scalar & 0x3FU));
+ bytesWritten = 4;
+ }
+ }
+#endif
+ }
+}
namespace System.Text.Json
{
- internal static class JsonReaderHelper
+ internal static partial class JsonReaderHelper
{
public static (int, int) CountNewLines(ReadOnlySpan<byte> data)
{
{
public ref partial struct Utf8JsonReader
{
- // Reject any invalid UTF-8 data rather than silently replacing.
- private static readonly UTF8Encoding s_utf8Encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
-
/// <summary>
- /// Reads the next JSON token value from the source transcoded as a <see cref="string"/>.
+ /// Reads the next JSON token value from the source, unescaped, and transcoded as a <see cref="string"/>.
/// </summary>
/// <exception cref="InvalidOperationException">
/// Thrown if trying to get the value of the JSON token that is not a string
/// (i.e. other than <see cref="JsonTokenType.String"/> or <see cref="JsonTokenType.PropertyName"/>).
/// <seealso cref="TokenType" />
- /// </exception>
- /// <exception cref="ArgumentException">
- /// Thrown if invalid UTF-8 byte sequences are detected while transcoding.
+ /// I will also throw when the JSON string contains invalid UTF-8 bytes, or invalid UTF-16 surrogates.
/// </exception>
public string GetStringValue()
{
ReadOnlySpan<byte> span = HasValueSequence ? ValueSequence.ToArray() : ValueSpan;
-#if BUILDING_INBOX_LIBRARY
- // TODO: https://github.com/dotnet/corefx/issues/33292
- return s_utf8Encoding.GetString(span);
-#else
- if (span.IsEmpty)
- {
- return string.Empty;
- }
- unsafe
+ int idx = span.IndexOf(JsonConstants.BackSlash);
+ if (idx != -1)
{
- fixed (byte* bytePtr = span)
- {
- // TODO: https://github.com/dotnet/corefx/issues/33292
- return s_utf8Encoding.GetString(bytePtr, span.Length);
- }
+ return JsonReaderHelper.GetUnescapedString(span, idx);
}
-#endif
+ return JsonReaderHelper.TranscodeHelper(span);
}
/// <summary>
throw new ArgumentException(SR.Format(SR.CannotWriteInvalidUTF16, $"0x{charAsInt:X2}"));
}
+ public static void ThrowInvalidOperationException_ReadInvalidUTF16(int charAsInt)
+ {
+ throw new InvalidOperationException(SR.Format(SR.CannotReadInvalidUTF16, $"0x{charAsInt:X2}"));
+ }
+
+ public static void ThrowInvalidOperationException_ReadInvalidUTF16()
+ {
+ throw new InvalidOperationException(SR.CannotReadIncompleteUTF16);
+ }
+
+ public static InvalidOperationException GetInvalidOperationException_ReadInvalidUTF8(DecoderFallbackException innerException)
+ {
+ return new InvalidOperationException(SR.CannotTranscodeInvalidUtf8, innerException);
+ }
+
[MethodImpl(MethodImplOptions.NoInlining)]
public static InvalidOperationException GetInvalidOperationException(ExceptionResource resource, int currentDepth, byte token, JsonTokenType tokenType)
{
// Divide by 0x400 to shift right by 10 in order to find the surrogate pairs from the scalar
// High surrogate = ((scalar - 0x10000) / 0x400) + D800
// Low surrogate = ((scalar - 0x10000) % 0x400) + DC00
- int quotient = Math.DivRem(scalar - JsonConstants.UnicodePlane01StartValue, JsonConstants.ShiftRightBy10, out int remainder);
+ int quotient = Math.DivRem(scalar - JsonConstants.UnicodePlane01StartValue, JsonConstants.BitShiftBy10, out int remainder);
int firstChar = quotient + JsonConstants.HighSurrogateStartValue;
int nextChar = remainder + JsonConstants.LowSurrogateStartValue;
bool result = Utf8Formatter.TryFormat(firstChar, destination.Slice(written), out int bytesWritten, format: s_hexStandardFormat);
private static bool IsUtf8ContinuationByte(byte value) => (value & 0xC0) == 0x80;
/// <summary>
- /// Returns <see langword="true"/> iff <paramref name="value"/> is between
- /// <paramref name="lowerBound"/> and <paramref name="upperBound"/>, inclusive.
- /// </summary>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static bool IsInRangeInclusive(byte value, byte lowerBound, byte upperBound)
- => ((byte)(value - lowerBound) <= (byte)(upperBound - lowerBound));
-
- /// <summary>
- /// Returns <see langword="true"/> iff <paramref name="value"/> is between
- /// <paramref name="lowerBound"/> and <paramref name="upperBound"/>, inclusive.
- /// </summary>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static bool IsInRangeInclusive(uint value, uint lowerBound, uint upperBound)
- => (value - lowerBound) <= (upperBound - lowerBound);
-
- /// <summary>
/// Returns <see langword="true"/> iff the low word of <paramref name="char"/> is a UTF-16 surrogate.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
// - Multi-byte sequences which are improperly terminated (no continuation byte when one is
// expected) are reported as invalid sequences up to and including the last seen continuation byte.
- Debug.Assert(IsValidUnicodeScalar(ReplacementChar));
+ Debug.Assert(JsonHelpers.IsValidUnicodeScalar(ReplacementChar));
rune = ReplacementChar;
if (data.IsEmpty)
if (IsAsciiValue(firstByte))
{
// ASCII byte = well-formed one-byte sequence.
- Debug.Assert(IsValidUnicodeScalar(firstByte));
+ Debug.Assert(JsonHelpers.IsValidUnicodeScalar(firstByte));
rune = firstByte;
numBytesConsumed = 1;
return SequenceValidity.WellFormed;
}
- if (!IsInRangeInclusive(firstByte, (byte)0xC2U, (byte)0xF4U))
+ if (!JsonHelpers.IsInRangeInclusive(firstByte, (byte)0xC2U, (byte)0xF4U))
{
// Standalone continuation byte or "always invalid" byte = ill-formed one-byte sequence.
goto InvalidOneByteSequence;
{
// Well-formed two-byte sequence.
uint scalar = (((uint)firstByte & 0x1FU) << 6) | ((uint)secondByte & 0x3FU);
- Debug.Assert(IsValidUnicodeScalar(scalar));
+ Debug.Assert(JsonHelpers.IsValidUnicodeScalar(scalar));
rune = (int)scalar;
numBytesConsumed = 2;
return SequenceValidity.WellFormed;
{
// Well-formed three-byte sequence.
scalar |= (uint)thirdByte & 0x3FU;
- Debug.Assert(IsValidUnicodeScalar(scalar));
+ Debug.Assert(JsonHelpers.IsValidUnicodeScalar(scalar));
rune = (int)scalar;
numBytesConsumed = 3;
return SequenceValidity.WellFormed;
// Need to check for overlong or out-of-range sequences.
uint scalar = (((uint)firstByte & 0x07U) << 18) | (((uint)secondByte & 0x3FU) << 12);
- Debug.Assert(IsValidUnicodeScalar(scalar));
- if (!IsInRangeInclusive(scalar, 0x10000U, 0x10FFFFU))
+ Debug.Assert(JsonHelpers.IsValidUnicodeScalar(scalar));
+ if (!JsonHelpers.IsInRangeInclusive(scalar, 0x10000U, 0x10FFFFU))
{
goto OverlongOutOfRangeOrSurrogateSequence;
}
{
// Well-formed four-byte sequence.
scalar |= (((uint)thirdByte & 0x3FU) << 6) | ((uint)fourthByte & 0x3FU);
- Debug.Assert(IsValidUnicodeScalar(scalar));
+ Debug.Assert(JsonHelpers.IsValidUnicodeScalar(scalar));
rune = (int)scalar;
numBytesConsumed = 4;
return SequenceValidity.WellFormed;
private static void EscapeNextChars(ReadOnlySpan<char> value, int firstChar, Span<char> destination, ref int consumed, ref int written)
{
int nextChar = -1;
- if (IsInRangeInclusive(firstChar, JsonConstants.HighSurrogateStartValue, JsonConstants.LowSurrogateEndValue))
+ if (JsonHelpers.IsInRangeInclusive(firstChar, JsonConstants.HighSurrogateStartValue, JsonConstants.LowSurrogateEndValue))
{
consumed++;
if (value.Length <= consumed || firstChar >= JsonConstants.LowSurrogateStartValue)
}
nextChar = value[consumed];
- if (!IsInRangeInclusive(nextChar, JsonConstants.LowSurrogateStartValue, JsonConstants.LowSurrogateEndValue))
+ if (!JsonHelpers.IsInRangeInclusive(nextChar, JsonConstants.LowSurrogateStartValue, JsonConstants.LowSurrogateEndValue))
{
ThrowHelper.ThrowArgumentException_InvalidUTF16(nextChar);
}
}
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static bool IsInRangeInclusive(int ch, int start, int end)
- {
- return (uint)(ch - start) <= (uint)(end - start);
- }
-
/// <summary>
/// A scalar that represents the Unicode replacement character U+FFFD.
/// </summary>
private const int ReplacementChar = 0xFFFD;
- /// <summary>
- /// Returns <see langword="true"/> iff <paramref name="value"/> is a valid Unicode scalar
- /// value, i.e., is in [ U+0000..U+D7FF ], inclusive; or [ U+E000..U+10FFFF ], inclusive.
- /// </summary>
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static bool IsValidUnicodeScalar(uint value)
- {
- // By XORing the incoming value with 0xD800, surrogate code points
- // are moved to the range [ U+0000..U+07FF ], and all valid scalar
- // values are clustered into the single range [ U+0800..U+10FFFF ],
- // which allows performing a single fast range check.
-
- return IsInRangeInclusive(value ^ 0xD800U, 0x800U, 0x10FFFFU);
- }
-
#if !BUILDING_INBOX_LIBRARY
private static int WriteHex(int value, Span<char> destination, int written)
{
else
{
// if (!IsLowSurrogate(ch) && !IsHighSurrogate(ch))
- if (!IsInRangeInclusive(ch, JsonConstants.HighSurrogateStart, JsonConstants.LowSurrogateEnd))
+ if (!JsonHelpers.IsInRangeInclusive(ch, JsonConstants.HighSurrogateStart, JsonConstants.LowSurrogateEnd))
{
// 3 byte encoding
chd = unchecked((sbyte)0xE0) | (ch >> 12);
chd = *pSrc;
// if (!IsLowSurrogate(chd)) {
- if (!IsInRangeInclusive(chd, JsonConstants.LowSurrogateStart, JsonConstants.LowSurrogateEnd))
+ if (!JsonHelpers.IsInRangeInclusive(chd, JsonConstants.LowSurrogateStart, JsonConstants.LowSurrogateEnd))
{
// high not followed by low -> bad
goto InvalidData;
else
{
// if (!IsLowSurrogate(ch) && !IsHighSurrogate(ch))
- if (!IsInRangeInclusive(ch, JsonConstants.HighSurrogateStart, JsonConstants.LowSurrogateEnd))
+ if (!JsonHelpers.IsInRangeInclusive(ch, JsonConstants.HighSurrogateStart, JsonConstants.LowSurrogateEnd))
{
if (pAllocatedBufferEnd - pTarget <= 2)
goto DestinationFull;
chd = *pSrc;
// if (!IsLowSurrogate(chd)) {
- if (!IsInRangeInclusive(chd, JsonConstants.LowSurrogateStart, JsonConstants.LowSurrogateEnd))
+ if (!JsonHelpers.IsInRangeInclusive(chd, JsonConstants.LowSurrogateStart, JsonConstants.LowSurrogateEnd))
{
// high not followed by low -> bad
goto InvalidData;
using System.Collections.Generic;
using System.Globalization;
+using System.IO;
+using Newtonsoft.Json;
using Xunit;
namespace System.Text.Json.Tests
Assert.Equal(dataUtf8.Length, json.BytesConsumed);
Assert.Equal(json.BytesConsumed, json.CurrentState.BytesConsumed);
}
+
+ [Theory]
+ [InlineData("{\"message\":\"Hello, I am \\\"Ahson!\\\"\"}")]
+ [InlineData("{\"nam\\\"e\":\"ah\\\"son\"}")]
+ [InlineData("{\"Here is a string: \\\"\\\"\":\"Here is a\",\"Here is a back slash\\\\\":[\"Multiline\\r\\n String\\r\\n\",\"\\tMul\\r\\ntiline String\",\"\\\"somequote\\\"\\tMu\\\"\\\"l\\r\\ntiline\\\"another\\\" String\\\\\"],\"str\":\"\\\"\\\"\"}")]
+ [InlineData("[\"\\u0030\\u0031\\u0032\\u0033\\u0034\\u0035\", \"\\u0000\\u002B\", \"a\\u005C\\u0072b\", \"a\\\\u005C\\u0072b\", \"a\\u008E\\u008Fb\", \"a\\uD803\\uDE6Db\", \"a\\uD834\\uDD1Eb\", \"a\\\\uD834\\\\uDD1Eb\"]")]
+ [InlineData("{\"message\":\"Hello /a/b/c \\/ \\r\\b\\n\\f\\t\\/\"}")]
+ [InlineData(null)] // Large randomly generated string
+ public static void TestingGetString(string jsonString)
+ {
+ if (jsonString == null)
+ {
+ var random = new Random(42);
+ var charArray = new char[500];
+ charArray[0] = '"';
+ for (int i = 1; i < charArray.Length; i++)
+ {
+ charArray[i] = (char)random.Next('?', '\\'); // ASCII values (between 63 and 91) that don't need to be escaped.
+ }
+
+ charArray[256] = '\\';
+ charArray[257] = '"';
+ charArray[charArray.Length - 1] = '"';
+ jsonString = new string(charArray);
+ }
+
+ var expectedPropertyNames = new List<string>();
+ var expectedValues = new List<string>();
+
+ var jsonNewtonsoft = new JsonTextReader(new StringReader(jsonString));
+ while (jsonNewtonsoft.Read())
+ {
+ if (jsonNewtonsoft.TokenType == JsonToken.String)
+ {
+ expectedValues.Add(jsonNewtonsoft.Value.ToString());
+ }
+ else if (jsonNewtonsoft.TokenType == JsonToken.PropertyName)
+ {
+ expectedPropertyNames.Add(jsonNewtonsoft.Value.ToString());
+ }
+ }
+
+ byte[] dataUtf8 = Encoding.UTF8.GetBytes(jsonString);
+
+ var actualPropertyNames = new List<string>();
+ var actualValues = new List<string>();
+
+ var json = new Utf8JsonReader(dataUtf8, true, default);
+ while (json.Read())
+ {
+ if (json.TokenType == JsonTokenType.String)
+ {
+ actualValues.Add(json.GetStringValue());
+ }
+ else if (json.TokenType == JsonTokenType.PropertyName)
+ {
+ actualPropertyNames.Add(json.GetStringValue());
+ }
+ }
+
+ Assert.Equal(expectedPropertyNames.Count, actualPropertyNames.Count);
+ for (int i = 0; i < expectedPropertyNames.Count; i++)
+ {
+ Assert.Equal(expectedPropertyNames[i], actualPropertyNames[i]);
+ }
+
+ Assert.Equal(expectedValues.Count, actualValues.Count);
+ for (int i = 0; i < expectedValues.Count; i++)
+ {
+ Assert.Equal(expectedValues[i], actualValues[i]);
+ }
+
+ Assert.Equal(dataUtf8.Length, json.BytesConsumed);
+ Assert.Equal(json.BytesConsumed, json.CurrentState.BytesConsumed);
+ }
+
+ [Theory]
+ [InlineData("\"a\\uDD1E\"")]
+ [InlineData("\"a\\uDD1Eb\"")]
+ [InlineData("\"a\\uD834\"")]
+ [InlineData("\"a\\uD834\\u0030\"")]
+ [InlineData("\"a\\uD834\\uD834\"")]
+ [InlineData("\"a\\uD834b\"")]
+ [InlineData("\"a\\uDD1E\\uD834b\"")]
+ [InlineData("\"a\\\\uD834\\uDD1Eb\"")]
+ [InlineData("\"a\\uDD1E\\\\uD834b\"")]
+ public static void TestingGetStringInvalidUTF16(string jsonString)
+ {
+ byte[] dataUtf8 = Encoding.UTF8.GetBytes(jsonString);
+
+ foreach (JsonCommentHandling commentHandling in Enum.GetValues(typeof(JsonCommentHandling)))
+ {
+ var state = new JsonReaderState(options: new JsonReaderOptions { CommentHandling = commentHandling });
+ var json = new Utf8JsonReader(dataUtf8, isFinalBlock: true, state);
+
+ Assert.True(json.Read());
+ Assert.Equal(JsonTokenType.String, json.TokenType);
+ try
+ {
+ string val = json.GetStringValue();
+ Assert.True(false, "Expected InvalidOperationException when trying to get string value for invalid UTF-16 JSON text.");
+ }
+ catch (InvalidOperationException) { }
+ }
+ }
+
+
+
+ [Theory]
+ [MemberData(nameof(InvalidUTF8Strings))]
+ public static void TestingGetStringInvalidUTF8(byte[] dataUtf8)
+ {
+ foreach (JsonCommentHandling commentHandling in Enum.GetValues(typeof(JsonCommentHandling)))
+ {
+ var state = new JsonReaderState(options: new JsonReaderOptions { CommentHandling = commentHandling });
+ var json = new Utf8JsonReader(dataUtf8, isFinalBlock: true, state);
+
+ // It is expected that the Utf8JsonReader won't throw an exception here
+ Assert.True(json.Read());
+ Assert.Equal(JsonTokenType.String, json.TokenType);
+
+ while (json.Read())
+ ;
+
+ json = new Utf8JsonReader(dataUtf8, isFinalBlock: true, state);
+
+ while (json.Read())
+ {
+ if (json.TokenType == JsonTokenType.String)
+ {
+ try
+ {
+ string val = json.GetStringValue();
+ Assert.True(false, "Expected InvalidOperationException when trying to get string value for invalid UTF-8 JSON text.");
+ }
+ catch (InvalidOperationException ex)
+ {
+ Assert.Equal(ex.InnerException.GetType(), typeof(DecoderFallbackException));
+ }
+ }
+ }
+ }
+ }
}
}
}
[Theory]
- [InlineData("{\"nam\\\"e\":\"ah\\\"son\"}", JsonCommentHandling.Disallow, "nam\\\"e, ah\\\"son, ")]
+ [InlineData("{\"nam\\\"e\":\"ah\\\"son\"}", "nam\\\"e, ah\\\"son, ", "nam\"e, ah\"son, ")]
[InlineData("{\"Here is a string: \\\"\\\"\":\"Here is a\",\"Here is a back slash\\\\\":[\"Multiline\\r\\n String\\r\\n\",\"\\tMul\\r\\ntiline String\",\"\\\"somequote\\\"\\tMu\\\"\\\"l\\r\\ntiline\\\"another\\\" String\\\\\"],\"str\":\"\\\"\\\"\"}",
- JsonCommentHandling.Disallow,
- "Here is a string: \\\"\\\", Here is a, Here is a back slash\\\\, Multiline\\r\\n String\\r\\n, \\tMul\\r\\ntiline String, \\\"somequote\\\"\\tMu\\\"\\\"l\\r\\ntiline\\\"another\\\" String\\\\, str, \\\"\\\", ")]
-
- [InlineData("{\"nam\\\"e\":\"ah\\\"son\"}", JsonCommentHandling.Allow, "nam\\\"e, ah\\\"son, ")]
- [InlineData("{\"Here is a string: \\\"\\\"\":\"Here is a\",\"Here is a back slash\\\\\":[\"Multiline\\r\\n String\\r\\n\",\"\\tMul\\r\\ntiline String\",\"\\\"somequote\\\"\\tMu\\\"\\\"l\\r\\ntiline\\\"another\\\" String\\\\\"],\"str\":\"\\\"\\\"\"}",
- JsonCommentHandling.Allow,
- "Here is a string: \\\"\\\", Here is a, Here is a back slash\\\\, Multiline\\r\\n String\\r\\n, \\tMul\\r\\ntiline String, \\\"somequote\\\"\\tMu\\\"\\\"l\\r\\ntiline\\\"another\\\" String\\\\, str, \\\"\\\", ")]
-
- [InlineData("{\"nam\\\"e\":\"ah\\\"son\"}", JsonCommentHandling.Skip, "nam\\\"e, ah\\\"son, ")]
- [InlineData("{\"Here is a string: \\\"\\\"\":\"Here is a\",\"Here is a back slash\\\\\":[\"Multiline\\r\\n String\\r\\n\",\"\\tMul\\r\\ntiline String\",\"\\\"somequote\\\"\\tMu\\\"\\\"l\\r\\ntiline\\\"another\\\" String\\\\\"],\"str\":\"\\\"\\\"\"}",
- JsonCommentHandling.Skip,
- "Here is a string: \\\"\\\", Here is a, Here is a back slash\\\\, Multiline\\r\\n String\\r\\n, \\tMul\\r\\ntiline String, \\\"somequote\\\"\\tMu\\\"\\\"l\\r\\ntiline\\\"another\\\" String\\\\, str, \\\"\\\", ")]
- public static void TestJsonReaderUtf8SpecialString(string jsonString, JsonCommentHandling commentHandling, string expectedStr)
+ "Here is a string: \\\"\\\", Here is a, Here is a back slash\\\\, Multiline\\r\\n String\\r\\n, \\tMul\\r\\ntiline String, \\\"somequote\\\"\\tMu\\\"\\\"l\\r\\ntiline\\\"another\\\" String\\\\, str, \\\"\\\", ",
+ "Here is a string: \"\", Here is a, Here is a back slash\\, Multiline\r\n String\r\n, \tMul\r\ntiline String, \"somequote\"\tMu\"\"l\r\ntiline\"another\" String\\, str, \"\", ")]
+ public static void TestJsonReaderUtf8SpecialString(string jsonString, string expectedStr, string expectedEscapedStr)
{
- byte[] dataUtf8 = Encoding.UTF8.GetBytes(jsonString);
- byte[] result = JsonTestHelper.ReturnBytesHelper(dataUtf8, out int length, commentHandling);
- string actualStr = Encoding.UTF8.GetString(result, 0, length);
+ foreach (JsonCommentHandling commentHandling in Enum.GetValues(typeof(JsonCommentHandling)))
+ {
+ byte[] dataUtf8 = Encoding.UTF8.GetBytes(jsonString);
+ byte[] result = JsonTestHelper.ReturnBytesHelper(dataUtf8, out int length, commentHandling);
+ string actualStr = Encoding.UTF8.GetString(result, 0, length);
- Assert.Equal(expectedStr, actualStr);
+ Assert.Equal(expectedStr, actualStr);
- result = JsonTestHelper.SequenceReturnBytesHelper(dataUtf8, out length, commentHandling);
- actualStr = Encoding.UTF8.GetString(result, 0, length);
+ result = JsonTestHelper.SequenceReturnBytesHelper(dataUtf8, out length, commentHandling);
+ actualStr = Encoding.UTF8.GetString(result, 0, length);
- Assert.Equal(expectedStr, actualStr);
+ Assert.Equal(expectedStr, actualStr);
- object jsonValues = JsonTestHelper.ReturnObjectHelper(dataUtf8, commentHandling);
- string str = JsonTestHelper.ObjectToString(jsonValues);
- ReadOnlySpan<char> expectedSpan = expectedStr.AsSpan(0, expectedStr.Length - 2);
- ReadOnlySpan<char> actualSpan = str.AsSpan(0, str.Length - 2);
- Assert.True(expectedSpan.SequenceEqual(actualSpan));
+ object jsonValues = JsonTestHelper.ReturnObjectHelper(dataUtf8, commentHandling);
+ string str = JsonTestHelper.ObjectToString(jsonValues);
+ Assert.Equal(expectedEscapedStr, str);
+
+ Stream stream = new MemoryStream(dataUtf8);
+ TextReader reader = new StreamReader(stream, Encoding.UTF8, false, 1024, true);
+ expectedEscapedStr = JsonTestHelper.NewtonsoftReturnStringHelper(reader);
+ Assert.Equal(expectedEscapedStr, str);
+ }
}
[Theory]
new object[] {" true ", true, "True"},
new object[] {" false ", true, "False"},
new object[] {" null ", true, "null"},
- new object[] {" \" Test string with \\\"nested quotes \\\" and hex: \\uABCD values! \" ", true, " Test string with \\\"nested quotes \\\" and hex: \\uABCD values! "},
+ new object[] {" \" Test string with \\\"nested quotes \\\" and hex: \\uABCD values! \" ", true, " Test string with \"nested quotes \" and hex: \uABCD values! "},
new object[] {" 12345 ", false, "12345"},
new object[] {" 12345.67890e-12 ", false, "1.23456789E-08"},
new object[] {" true ", false, "True"},
new object[] {" false ", false, "False"},
new object[] {" null ", false, "null"},
- new object[] {" \" Test string with \\\"nested quotes \\\" and hex: \\uABCD values! \" ", false, " Test string with \\\"nested quotes \\\" and hex: \\uABCD values! "},
+ new object[] {" \" Test string with \\\"nested quotes \\\" and hex: \\uABCD values! \" ", false, " Test string with \"nested quotes \" and hex: \uABCD values! "},
};
}
}
};
}
}
+
+ public static IEnumerable<object[]> InvalidUTF8Strings
+ {
+ get
+ {
+ return new List<object[]>
+ {
+ new object[] { new byte[] { 34, 97, 0xc3, 0x28, 98, 34 } },
+ new object[] { new byte[] { 34, 97, 0xa0, 0xa1, 98, 34 } },
+ new object[] { new byte[] { 34, 97, 0xe2, 0x28, 0xa1, 98, 34 } },
+ new object[] { new byte[] { 34, 97, 0xe2, 0x82, 0x28, 98, 34 } },
+ new object[] { new byte[] { 34, 97, 0xf0, 0x28, 0x8c, 0xbc, 98, 34 } },
+ new object[] { new byte[] { 34, 97, 0xf0, 0x90, 0x28, 0xbc, 98, 34 } },
+ new object[] { new byte[] { 34, 97, 0xf0, 0x28, 0x8c, 0x28, 98, 34 } },
+ };
+ }
+ }
}
}