Add TextEquals helper methods to Utf8JsonReader (dotnet/corefx#35979)
authorAhson Khan <ahkha@microsoft.com>
Fri, 15 Mar 2019 05:38:46 +0000 (22:38 -0700)
committerGitHub <noreply@github.com>
Fri, 15 Mar 2019 05:38:46 +0000 (22:38 -0700)
* Add initial impl of ValueEquals with basic tests.

* Rename to TextEquals based on API review feedback.

* Move to separate test file, increase coverage, and fill in missing impl
details.

* Add xml comments and more test cases.

* Re-enable all the tests.

* Update tests to be netstandard compliant.

* Rename some existing tests to be clearer.

* Address PR feedback.

* Return early if we know unescaping won't produce a match.

* More eagerly check the lengths to return mismatch sooner.

* Add tests to improve code coverage and re-write to avoid unreachable
code.

* Fix resource string merge issue.

* Fix test build failures on netstandard (missing implicit string->span
cast)

Commit migrated from https://github.com/dotnet/corefx/commit/b8bc4ff80c5f7baa681e8a569d367356957ba78a

src/libraries/System.Text.Json/ref/System.Text.Json.cs
src/libraries/System.Text.Json/src/Resources/Strings.resx
src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs
src/libraries/System.Text.Json/src/System/Text/Json/Reader/JsonReaderHelper.Unescaping.cs
src/libraries/System.Text.Json/src/System/Text/Json/Reader/Utf8JsonReader.cs
src/libraries/System.Text.Json/src/System/Text/Json/ThrowHelper.cs
src/libraries/System.Text.Json/tests/System.Text.Json.Tests.csproj
src/libraries/System.Text.Json/tests/Utf8JsonReaderTests.MultiSegment.cs
src/libraries/System.Text.Json/tests/Utf8JsonReaderTests.TextEquals.cs [new file with mode: 0644]
src/libraries/System.Text.Json/tests/Utf8JsonReaderTests.cs

index b43b543..c342170 100644 (file)
@@ -186,6 +186,8 @@ namespace System.Text.Json
         [System.CLSCompliantAttribute(false)]
         public ulong GetUInt64() { throw null; }
         public bool Read() { throw null; }
+        public bool TextEquals(ReadOnlySpan<char> otherText) { throw null; }
+        public bool TextEquals(ReadOnlySpan<byte> otherUtf8Text) { throw null; }
         public bool TryGetDateTime(out DateTime value) { throw null; }
         public bool TryGetDateTimeOffset(out DateTimeOffset value) { throw null; }
         public bool TryGetDecimal(out decimal value) { throw null; }
index 133f84c..206c18f 100644 (file)
@@ -1,4 +1,5 @@
-<root>
+<?xml version="1.0" encoding="utf-8"?>
+<root>
   <!-- 
     Microsoft ResX Schema 
     
   <data name="EnumConverterNotImplemented" xml:space="preserve">
     <value>EnumConverter is not yet supported on .NET Standard 2.0.</value>
   </data>
+  <data name="InvalidComparison" xml:space="preserve">
+    <value>Cannot compare the value of a token type '{0}' to text.</value>
+  </data>
   <data name="FormatDateTime" xml:space="preserve">
     <value>The JSON value is of unsupported format for a DateTime.</value>
   </data>
index 84e90ba..156dbd8 100644 (file)
@@ -52,6 +52,11 @@ namespace System.Text.Json
         // The same factor applies to utf-16 characters.
         public const int MaxExpansionFactorWhileEscaping = 6;
 
+        // In the worst case, a single UTF-16 character could be expanded to 3 UTF-8 bytes.
+        // Only surrogate pairs expand to 4 UTF-8 bytes but that is a transformation of 2 UTF-16 characters goign to 4 UTF-8 bytes (factor of 2).
+        // All other UTF-16 characters can be represented by either 1 or 2 UTF-8 bytes.
+        public const int MaxExpansionFactorWhileTranscoding = 3;
+
         public const int MaxTokenSize = 2_000_000_000 / MaxExpansionFactorWhileEscaping;  // 357_913_941 bytes
         public const int MaxCharacterTokenSize = 2_000_000_000 / MaxExpansionFactorWhileEscaping; // 357_913_941 characters
 
index 5a4e8f7..8063db2 100644 (file)
@@ -39,6 +39,74 @@ namespace System.Text.Json
             return utf8String;
         }
 
+        public static bool UnescapeAndCompare(ReadOnlySpan<byte> utf8Source, ReadOnlySpan<byte> other)
+        {
+            Debug.Assert(utf8Source.Length >= other.Length && utf8Source.Length / JsonConstants.MaxExpansionFactorWhileEscaping <= other.Length);
+
+            byte[] unescapedArray = null;
+
+            Span<byte> utf8Unescaped = utf8Source.Length <= JsonConstants.StackallocThreshold ?
+                stackalloc byte[utf8Source.Length] :
+                (unescapedArray = ArrayPool<byte>.Shared.Rent(utf8Source.Length));
+
+            Unescape(utf8Source, utf8Unescaped, 0, out int written);
+            Debug.Assert(written > 0);
+
+            utf8Unescaped = utf8Unescaped.Slice(0, written);
+            Debug.Assert(!utf8Unescaped.IsEmpty);
+
+            bool result = other.SequenceEqual(utf8Unescaped);
+
+            if (unescapedArray != null)
+            {
+                utf8Unescaped.Clear();
+                ArrayPool<byte>.Shared.Return(unescapedArray);
+            }
+
+            return result;
+        }
+
+        public static bool UnescapeAndCompare(ReadOnlySequence<byte> utf8Source, ReadOnlySpan<byte> other)
+        {
+            Debug.Assert(!utf8Source.IsSingleSegment);
+            Debug.Assert(utf8Source.Length >= other.Length && utf8Source.Length / JsonConstants.MaxExpansionFactorWhileEscaping <= other.Length);
+
+            byte[] escapedArray = null;
+            byte[] unescapedArray = null;
+
+            int length = checked((int)utf8Source.Length);
+
+            Span<byte> utf8Unescaped = length <= JsonConstants.StackallocThreshold ?
+                stackalloc byte[length] :
+                (unescapedArray = ArrayPool<byte>.Shared.Rent(length));
+
+            Span<byte> utf8Escaped = length <= JsonConstants.StackallocThreshold ?
+                stackalloc byte[length] :
+                (escapedArray = ArrayPool<byte>.Shared.Rent(length));
+
+            utf8Source.CopyTo(utf8Escaped);
+            utf8Escaped = utf8Escaped.Slice(0, length);
+
+            Unescape(utf8Escaped, utf8Unescaped, 0, out int written);
+            Debug.Assert(written > 0);
+
+            utf8Unescaped = utf8Unescaped.Slice(0, written);
+            Debug.Assert(!utf8Unescaped.IsEmpty);
+
+            bool result = other.SequenceEqual(utf8Unescaped);
+
+            if (unescapedArray != null)
+            {
+                Debug.Assert(escapedArray != null);
+                utf8Unescaped.Clear();
+                ArrayPool<byte>.Shared.Return(unescapedArray);
+                utf8Escaped.Clear();
+                ArrayPool<byte>.Shared.Return(escapedArray);
+            }
+
+            return result;
+        }
+
         public static string TranscodeHelper(ReadOnlySpan<byte> utf8Unescaped)
         {
             try
index fc655b8..aec49d9 100644 (file)
@@ -5,6 +5,7 @@
 using System.Buffers;
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 
 namespace System.Text.Json
 {
@@ -215,6 +216,284 @@ namespace System.Text.Json
             return retVal;
         }
 
+        /// <summary>
+        /// Compares the UTF-8 encoded text to the unescaped JSON token value in the source and returns true if they match.
+        /// </summary>
+        /// <param name="otherUtf8Text">The UTF-8 encoded text to compare against.</param>
+        /// <returns>True if the JSON token value in the source matches the UTF-8 encoded look up text.</returns>
+        /// <exception cref="InvalidOperationException">
+        /// Thrown if trying to find a text match on a JSON token that is not a string
+        /// (i.e. other than <see cref="JsonTokenType.String"/> or <see cref="JsonTokenType.PropertyName"/>).
+        /// <seealso cref="TokenType" />
+        /// </exception>
+        /// <remarks>
+        /// If the look up text is invalid UTF-8 text, the method will return false since you cannot have 
+        /// invalid UTF-8 within the JSON payload.
+        /// </remarks>
+        /// <remarks>
+        /// The comparison of the JSON token value in the source and the look up text is done by first unescaping the JSON value in source,
+        /// if required. The look up text is matched as is, without any modifications to it.
+        /// </remarks>
+        public bool TextEquals(ReadOnlySpan<byte> otherUtf8Text)
+        {
+            if (!IsTokenTypeString(TokenType))
+            {
+                throw ThrowHelper.GetInvalidOperationException_ExpectedStringComparison(TokenType);
+            }
+            return TextEqualsHelper(otherUtf8Text);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private bool TextEqualsHelper(ReadOnlySpan<byte> otherUtf8Text)
+        {
+            if (HasValueSequence)
+            {
+                return CompareToSequence(otherUtf8Text);
+            }
+
+            if (_stringHasEscaping)
+            {
+                return UnescapeAndCompare(otherUtf8Text);
+            }
+
+            return otherUtf8Text.SequenceEqual(ValueSpan);
+        }
+
+        /// <summary>
+        /// Compares the UTF-16 encoded text to the unescaped JSON token value in the source and returns true if they match.
+        /// </summary>
+        /// <param name="otherText">The UTF-16 encoded text to compare against.</param>
+        /// <returns>True if the JSON token value in the source matches the UTF-16 encoded look up text.</returns>
+        /// <exception cref="InvalidOperationException">
+        /// Thrown if trying to find a text match on a JSON token that is not a string
+        /// (i.e. other than <see cref="JsonTokenType.String"/> or <see cref="JsonTokenType.PropertyName"/>).
+        /// <seealso cref="TokenType" />
+        /// </exception>
+        /// <remarks>
+        /// If the look up text is invalid or incomplete UTF-16 text (i.e. unpaired surrogates), the method will return false
+        /// since you cannot have invalid UTF-16 within the JSON payload.
+        /// </remarks>
+        /// <remarks>
+        /// The comparison of the JSON token value in the source and the look up text is done by first unescaping the JSON value in source,
+        /// if required. The look up text is matched as is, without any modifications to it.
+        /// </remarks>
+        public bool TextEquals(ReadOnlySpan<char> otherText)
+        {
+            if (!IsTokenTypeString(TokenType))
+            {
+                throw ThrowHelper.GetInvalidOperationException_ExpectedStringComparison(TokenType);
+            }
+
+            if (MatchNotPossible(otherText.Length))
+            {
+                return false;
+            }
+
+            byte[] otherUtf8TextArray = null;
+
+            Span<byte> otherUtf8Text;
+
+            ReadOnlySpan<byte> utf16Text = MemoryMarshal.AsBytes(otherText);
+
+            int length = checked(utf16Text.Length * JsonConstants.MaxExpansionFactorWhileTranscoding);
+            if (length > JsonConstants.StackallocThreshold)
+            {
+                otherUtf8TextArray = ArrayPool<byte>.Shared.Rent(length);
+                otherUtf8Text = otherUtf8TextArray;
+            }
+            else
+            {
+                // Cannot create a span directly since it gets passed to instance methods on a ref struct.
+                unsafe
+                {
+                    byte* ptr = stackalloc byte[length];
+                    otherUtf8Text = new Span<byte>(ptr, length);
+                }
+            }
+
+            OperationStatus status = JsonWriterHelper.ToUtf8(utf16Text, otherUtf8Text, out int consumed, out int written);
+            Debug.Assert(status != OperationStatus.DestinationTooSmall);
+            if (status > OperationStatus.DestinationTooSmall)   // Equivalent to: (status == NeedMoreData || status == InvalidData)
+            {
+                return false;
+            }
+            Debug.Assert(status == OperationStatus.Done);
+            Debug.Assert(consumed == utf16Text.Length);
+
+            bool result = TextEqualsHelper(otherUtf8Text.Slice(0, written));
+
+            if (otherUtf8TextArray != null)
+            {
+                otherUtf8Text.Slice(0, written).Clear();
+                ArrayPool<byte>.Shared.Return(otherUtf8TextArray);
+            }
+
+            return result;
+        }
+
+        private bool CompareToSequence(ReadOnlySpan<byte> other)
+        {
+            Debug.Assert(HasValueSequence);
+
+            if (_stringHasEscaping)
+            {
+                return UnescapeSequenceAndCompare(other);
+            }
+
+            ReadOnlySequence<byte> localSequence = ValueSequence;
+
+            Debug.Assert(!localSequence.IsSingleSegment);
+
+            if (localSequence.Length != other.Length)
+            {
+                return false;
+            }
+
+            int matchedSoFar = 0;
+
+            foreach (ReadOnlyMemory<byte> memory in localSequence)
+            {
+                ReadOnlySpan<byte> span = memory.Span;
+
+                if (other.Slice(matchedSoFar).StartsWith(span))
+                {
+                    matchedSoFar += span.Length;
+                }
+                else
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private bool UnescapeAndCompare(ReadOnlySpan<byte> other)
+        {
+            Debug.Assert(!HasValueSequence);
+            ReadOnlySpan<byte> localSpan = ValueSpan;
+
+            if (localSpan.Length < other.Length || localSpan.Length / JsonConstants.MaxExpansionFactorWhileEscaping > other.Length)
+            {
+                return false;
+            }
+
+            int idx = localSpan.IndexOf(JsonConstants.BackSlash);
+            Debug.Assert(idx != -1);
+
+            if (!other.StartsWith(localSpan.Slice(0, idx)))
+            {
+                return false;
+            }
+
+            return JsonReaderHelper.UnescapeAndCompare(localSpan.Slice(idx), other.Slice(idx));
+        }
+
+        private bool UnescapeSequenceAndCompare(ReadOnlySpan<byte> other)
+        {
+            Debug.Assert(HasValueSequence);
+            Debug.Assert(!ValueSequence.IsSingleSegment);
+
+            ReadOnlySequence<byte> localSequence = ValueSequence;
+            long sequenceLength = localSequence.Length;
+
+            // The JSON token value will at most shrink by 6 when unescaping.
+            // If it is still larger than the lookup string, there is no value in unescaping and doing the comparison.
+            if (sequenceLength < other.Length || sequenceLength / JsonConstants.MaxExpansionFactorWhileEscaping > other.Length)
+            {
+                return false;
+            }
+
+            int matchedSoFar = 0;
+
+            bool result = false;
+
+            foreach (ReadOnlyMemory<byte> memory in localSequence)
+            {
+                ReadOnlySpan<byte> span = memory.Span;
+
+                int idx = span.IndexOf(JsonConstants.BackSlash);
+
+                if (idx != -1)
+                {
+                    if (!other.Slice(matchedSoFar).StartsWith(span.Slice(0, idx)))
+                    {
+                        break;
+                    }
+                    matchedSoFar += idx;
+
+                    other = other.Slice(matchedSoFar);
+                    localSequence = localSequence.Slice(matchedSoFar);
+
+                    if (localSequence.IsSingleSegment)
+                    {
+                        result = JsonReaderHelper.UnescapeAndCompare(localSequence.First.Span, other);
+                    }
+                    else
+                    {
+                        result = JsonReaderHelper.UnescapeAndCompare(localSequence, other);
+                    }
+                    break;
+                }
+
+                if (!other.Slice(matchedSoFar).StartsWith(span))
+                {
+                    break;
+                }
+                matchedSoFar += span.Length;
+            }
+
+            return result;
+        }
+
+        // Returns true if the TokenType is a primitive string "value", i.e. PropertyName or String
+        // Otherwise, return false.
+        private static bool IsTokenTypeString(JsonTokenType tokenType) =>
+            (tokenType - JsonTokenType.PropertyName) <= (JsonTokenType.String - JsonTokenType.PropertyName);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private bool MatchNotPossible(int charTextLength)
+        {
+            if (HasValueSequence)
+            {
+                return MatchNotPossibleSequence(charTextLength);
+            }
+
+            int sourceLength = ValueSpan.Length;
+
+            // Transcoding from UTF-16 to UTF-8 will change the length by somwhere between 1x and 3x.
+            // Unescaping the token value will at most shrink its length by 6x.
+            // There is no point incurring the transcoding/unescaping/comparing cost if:
+            // - The token value is smaller than charTextLength
+            // - The token value needs to be transcoded AND unescaped and it is more than 6x larger than charTextLength
+            //      - For an ASCII UTF-16 characters, transcoding = 1x, escaping = 6x => 6x factor
+            //      - For non-ASCII UTF-16 characters within the BMP, transcoding = 2-3x, but they are represented as a single escaped hex value, \uXXXX => 6x factor
+            //      - For non-ASCII UTF-16 characters outside of the BMP, transcoding = 4x, but the surrogate pair (2 characters) are represented by 16 bytes \uXXXX\uXXXX => 6x factor
+            // - The token value needs to be transcoded, but NOT escaped and it is more than 3x larger than charTextLength
+            //      - For an ASCII UTF-16 characters, transcoding = 1x,
+            //      - For non-ASCII UTF-16 characters within the BMP, transcoding = 2-3x,
+            //      - For non-ASCII UTF-16 characters outside of the BMP, transcoding = 2x, (surrogate pairs - 2 characters transcode to 4 UTF-8 bytes)
+
+            if (sourceLength < charTextLength
+                || sourceLength / (_stringHasEscaping ? JsonConstants.MaxExpansionFactorWhileEscaping : JsonConstants.MaxExpansionFactorWhileTranscoding) > charTextLength)
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        private bool MatchNotPossibleSequence(int charTextLength)
+        {
+            long sourceLength = ValueSequence.Length;
+
+            if (sourceLength < charTextLength
+                || sourceLength / (_stringHasEscaping ? JsonConstants.MaxExpansionFactorWhileEscaping : JsonConstants.MaxExpansionFactorWhileTranscoding) > charTextLength)
+            {
+                return true;
+            }
+            return false;
+        }
+
         private void StartObject()
         {
             if (CurrentDepth >= _readerOptions.MaxDepth)
index bc33f0b..1d6cf8d 100644 (file)
@@ -183,6 +183,11 @@ namespace System.Text.Json
             return GetInvalidOperationException("string", tokenType);
         }
 
+        public static InvalidOperationException GetInvalidOperationException_ExpectedStringComparison(JsonTokenType tokenType)
+        {
+            return GetInvalidOperationException(tokenType);
+        }
+
         [MethodImpl(MethodImplOptions.NoInlining)]
         private static InvalidOperationException GetInvalidOperationException(string message, JsonTokenType tokenType)
         {
@@ -190,6 +195,12 @@ namespace System.Text.Json
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
+        private static InvalidOperationException GetInvalidOperationException(JsonTokenType tokenType)
+        {
+            return new InvalidOperationException(SR.Format(SR.InvalidComparison, tokenType));
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
         internal static InvalidOperationException GetJsonElementWrongTypeException(
             JsonTokenType expectedType,
             JsonTokenType actualType)
index 6e9b53a..672a8b0 100644 (file)
@@ -40,6 +40,7 @@
     <Compile Include="TestCaseType.cs" />
     <Compile Include="Utf8JsonReaderTests.cs" />
     <Compile Include="Utf8JsonReaderTests.MultiSegment.cs" />
+    <Compile Include="Utf8JsonReaderTests.TextEquals.cs" />
     <Compile Include="Utf8JsonReaderTests.TryGet.cs" />
     <Compile Include="Utf8JsonWriterTests.cs" />
   </ItemGroup>
index 85597a0..2614e95 100644 (file)
@@ -249,7 +249,7 @@ namespace System.Text.Json.Tests
         }
 
         [Fact]
-        public static void TestSingleStringsByOne()
+        public static void TestSingleStringsMultiSegmentByOne()
         {
             string jsonString = "\"Hello, \\u0041hson!\"";
             string expectedString = "Hello, \\u0041hson!, ";
@@ -277,7 +277,7 @@ namespace System.Text.Json.Tests
         }
 
         [Fact]
-        public static void TestSingleStrings()
+        public static void TestSingleStringsMultiSegment()
         {
             string jsonString = "\"Hello, \\u0041hson!\"";
             string expectedString = "Hello, \\u0041hson!, ";
diff --git a/src/libraries/System.Text.Json/tests/Utf8JsonReaderTests.TextEquals.cs b/src/libraries/System.Text.Json/tests/Utf8JsonReaderTests.TextEquals.cs
new file mode 100644 (file)
index 0000000..eb23278
--- /dev/null
@@ -0,0 +1,635 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using Xunit;
+
+namespace System.Text.Json.Tests
+{
+    public static partial class Utf8JsonReaderTests
+    {
+        [Fact]
+        public static void TestTextEqualsBasic()
+        {
+            byte[] connectionId = Encoding.UTF8.GetBytes("connectionId");
+            byte[] availableTransports = Encoding.UTF8.GetBytes("availableTransports");
+            byte[] value123 = Encoding.UTF8.GetBytes("123");
+            byte[] embeddedQuotes = Encoding.UTF8.GetBytes("My name is \"Ahson\"");
+            bool foundId = false;
+            bool foundTransports = false;
+            bool foundValue = false;
+            bool foundArrayValue = false;
+
+            string jsonString = "{\"conne\\u0063tionId\":\"123\",\"availableTransports\":[\"My name is \\\"Ahson\\\"\"]}";
+            byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+
+            var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.PropertyName)
+                {
+                    if (json.TextEquals(connectionId) && json.TextEquals("connectionId".AsSpan()))
+                    {
+                        foundId = true;
+                    }
+                    else if (json.TextEquals(availableTransports) && json.TextEquals("availableTransports".AsSpan()))
+                    {
+                        foundTransports = true;
+                    }
+                }
+                else if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(value123) && json.TextEquals("123".AsSpan()))
+                    {
+                        foundValue = true;
+                    }
+                    else if (json.TextEquals(embeddedQuotes) && json.TextEquals("My name is \"Ahson\"".AsSpan()))
+                    {
+                        foundArrayValue = true;
+                    }
+                }
+            }
+
+            Assert.True(foundId);
+            Assert.True(foundTransports);
+            Assert.True(foundValue);
+            Assert.True(foundArrayValue);
+        }
+
+        [Theory]
+        [InlineData("{\"name\": 1234}", "name", true)]
+        [InlineData("{\"name\": 1234}", "namee", false)]
+        [InlineData("{\"name\": 1234}", "na\\u006de", false)]
+        [InlineData("{\"name\": 1234}", "", false)]
+        [InlineData("{\"\": 1234}", "name", false)]
+        [InlineData("{\"\": 1234}", "na\\u006de", false)]
+        [InlineData("{\"\": 1234}", "", true)]
+        [InlineData("{\"na\\u006de\": 1234}", "name", true)]
+        [InlineData("{\"na\\u006de\": 1234}", "namee", false)]
+        [InlineData("{\"na\\u006de\": 1234}", "na\\u006de", false)]
+        [InlineData("{\"na\\u006de\": 1234}", "", false)]
+        public static void TestTextEquals(string jsonString, string lookUpString, bool expectedFound)
+        {
+            byte[] lookup = Encoding.UTF8.GetBytes(lookUpString);
+            byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+            bool found = false;
+
+            var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.PropertyName)
+                {
+                    if (json.TextEquals(lookup) && json.TextEquals(lookUpString.AsSpan()))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.Equal(expectedFound, found);
+
+            ReadOnlySequence<byte> sequence = JsonTestHelper.GetSequence(utf8Data, 1);
+            found = false;
+
+            json = new Utf8JsonReader(sequence, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.PropertyName)
+                {
+                    if (json.TextEquals(lookup) && json.TextEquals(lookUpString.AsSpan()))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.Equal(expectedFound, found);
+        }
+
+        [Theory]
+        [InlineData("{\"name\": \"John\"}", "John", true)]
+        [InlineData("{\"name\": \"John\"}", "Johna", false)]
+        [InlineData("{\"name\": \"John\"}", "Joh\\u006e", false)]
+        [InlineData("{\"name\": \"John\"}", "", false)]
+        [InlineData("{\"name\": \"\"}", "John", false)]
+        [InlineData("{\"name\": \"\"}", "Joh\\u006e", false)]
+        [InlineData("{\"name\": \"\"}", "", true)]
+        [InlineData("{\"name\": \"Joh\\u006e\"}", "John", true)]
+        [InlineData("{\"name\": \"Joh\\u006e\"}", "Johna", false)]
+        [InlineData("{\"name\": \"Joh\\u006e\"}", "Joh\\u006e", false)]
+        [InlineData("{\"name\": \"Joh\\u006e\"}", "", false)]
+        public static void TestTextEqualsValue(string jsonString, string lookUpString, bool expectedFound)
+        {
+            byte[] lookup = Encoding.UTF8.GetBytes(lookUpString);
+            byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+            bool found = false;
+
+            var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(lookup) && json.TextEquals(lookUpString.AsSpan()))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.Equal(expectedFound, found);
+
+            ReadOnlySequence<byte> sequence = JsonTestHelper.GetSequence(utf8Data, 1);
+            found = false;
+
+            json = new Utf8JsonReader(sequence, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(lookup) && json.TextEquals(lookUpString.AsSpan()))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.Equal(expectedFound, found);
+        }
+
+        [Fact]
+        public static void TestTextEqualsLargeMatch()
+        {
+            var jsonChars = new char[320];  // Some value larger than 256 (stack threshold)
+            jsonChars.AsSpan().Fill('a');
+            byte[] lookup = Encoding.UTF8.GetBytes(jsonChars);
+
+            ReadOnlySpan<char> escapedA = new char[6] { '\\', 'u', '0', '0', '6', '1' };
+
+            ReadOnlySpan<byte> lookupSpan = lookup.AsSpan(0, lookup.Length - escapedA.Length + 1);   // remove extra characters that were replaced by escaped bytes
+            Span<char> lookupChars = new char[jsonChars.Length];
+            jsonChars.CopyTo(lookupChars);
+            lookupChars = lookupChars.Slice(0, lookupChars.Length - escapedA.Length + 1);
+
+            // Replacing 'a' with '\u0061', so a net change of 5.
+            // escapedA.Length - 1 = 6 - 1 = 5
+            for (int i = 0; i < jsonChars.Length - escapedA.Length + 1; i++)
+            {
+                jsonChars.AsSpan().Fill('a');
+                escapedA.CopyTo(jsonChars.AsSpan(i));
+                string jsonString = "\"" + new string(jsonChars) + "\"";
+                byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+
+                bool found = false;
+
+                var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state: default);
+                while (json.Read())
+                {
+                    if (json.TokenType == JsonTokenType.String)
+                    {
+                        if (json.TextEquals(lookupSpan) && json.TextEquals(lookupChars))
+                        {
+                            found = true;
+                            break;
+                        }
+                    }
+                }
+
+                Assert.True(found, $"Json String: {jsonString}");
+
+                ReadOnlySequence<byte> sequence = JsonTestHelper.GetSequence(utf8Data, 1);
+                found = false;
+
+                json = new Utf8JsonReader(sequence, isFinalBlock: true, state: default);
+                while (json.Read())
+                {
+                    if (json.TokenType == JsonTokenType.String)
+                    {
+                        if (json.TextEquals(lookupSpan) && json.TextEquals(lookupChars))
+                        {
+                            found = true;
+                            break;
+                        }
+                    }
+                }
+
+                Assert.True(found, $"Json String: {jsonString}  | Look up: {Encoding.UTF8.GetString(lookupSpan.ToArray())}");
+            }
+        }
+
+        [Fact]
+        public static void TestTextEqualsLargeMismatch()
+        {
+            var jsonChars = new char[320];  // Some value larger than 256 (stack threshold)
+            jsonChars.AsSpan().Fill('a');
+            ReadOnlySpan<char> escapedA = new char[6] { '\\', 'u', '0', '0', '6', '1' };
+
+            byte[] originalLookup = Encoding.UTF8.GetBytes(jsonChars);
+
+            char[] originalLookupChars = new char[jsonChars.Length];
+            Array.Copy(jsonChars, originalLookupChars, jsonChars.Length);
+
+            for (int i = 1; i < jsonChars.Length - 6; i++)
+            {
+                jsonChars.AsSpan().Fill('a');
+                escapedA.CopyTo(jsonChars.AsSpan(i));
+                string jsonString = "\"" + new string(jsonChars) + "\"";
+                byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+
+                for (int j = 0; j < 3; j++)
+                {
+                    Span<byte> lookup = new byte[originalLookup.Length];
+                    originalLookup.CopyTo(lookup);
+                    lookup = lookup.Slice(0, lookup.Length - escapedA.Length + 1);    // remove extra characters that were replaced by escaped bytes
+
+                    Span<char> lookupChars = new char[originalLookupChars.Length];
+                    originalLookupChars.CopyTo(lookupChars);
+                    lookupChars = lookupChars.Slice(0, lookupChars.Length - escapedA.Length + 1);    // remove extra characters that were replaced by escaped bytes
+
+                    switch (j)
+                    {
+                        case 0:
+                            lookup[i] = (byte)'b';
+                            lookupChars[i] = 'b';
+                            break;
+                        case 1:
+                            lookup[i + 1] = (byte)'b';
+                            lookupChars[i + 1] = 'b';
+                            break;
+                        case 2:
+                            lookup[i - 1] = (byte)'b';
+                            lookupChars[i - 1] = 'b';
+                            break;
+                    }
+
+                    bool found = false;
+
+                    var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state: default);
+                    while (json.Read())
+                    {
+                        if (json.TokenType == JsonTokenType.String)
+                        {
+                            if (json.TextEquals(lookup) || json.TextEquals(lookupChars))
+                            {
+                                found = true;
+                                break;
+                            }
+                        }
+                    }
+
+                    Assert.False(found, $"Json String: {jsonString}");
+
+                    ReadOnlySequence<byte> sequence = JsonTestHelper.GetSequence(utf8Data, 1);
+                    found = false;
+
+                    json = new Utf8JsonReader(sequence, isFinalBlock: true, state: default);
+                    while (json.Read())
+                    {
+                        if (json.TokenType == JsonTokenType.String)
+                        {
+                            if (json.TextEquals(lookup) || json.TextEquals(lookupChars))
+                            {
+                                found = true;
+                                break;
+                            }
+                        }
+                    }
+
+                    Assert.False(found);
+                }
+            }
+        }
+
+        [Theory]
+        [InlineData("\"\\u0061\\u0061\"")]
+        [InlineData("\"aaaaaaaaaaaa\"")]
+        public static void TestTextEqualsTooSmallToMatch(string jsonString)
+        {
+            byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+
+            bool found = false;
+
+            var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(new byte[] { (byte)'a' }) || json.TextEquals(new char[] { 'a' }))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.False(found);
+
+            ReadOnlySequence<byte> sequence = JsonTestHelper.GetSequence(utf8Data, 1);
+            found = false;
+
+            json = new Utf8JsonReader(sequence, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(new byte[] { (byte)'a' }) || json.TextEquals(new char[] { 'a' }))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.False(found);
+        }
+
+        [Theory]
+        [InlineData("\"\\u0061\\u0061\"")]
+        [InlineData("\"aaaaaaaaaaaa\"")]
+        public static void TestTextEqualsTooLargeToMatch(string jsonString)
+        {
+            byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+
+            var lookupString = new string('a', 13);
+
+            bool found = false;
+
+            var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(Encoding.UTF8.GetBytes(lookupString)) || json.TextEquals(lookupString.AsSpan()))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.False(found);
+
+            ReadOnlySequence<byte> sequence = JsonTestHelper.GetSequence(utf8Data, 1);
+            found = false;
+
+            json = new Utf8JsonReader(sequence, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(Encoding.UTF8.GetBytes(lookupString)) || json.TextEquals(lookupString.AsSpan()))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.False(found);
+        }
+
+        [Theory]
+        [InlineData("\"aaabbb\"", "aaaaaa")]
+        [InlineData("\"bbbaaa\"", "aaaaaa")]
+        public static void TextMismatchSameLength(string jsonString, string lookupString)
+        {
+            byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+
+            bool found = false;
+
+            var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(Encoding.UTF8.GetBytes(lookupString)) || json.TextEquals(lookupString.AsSpan()))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.False(found);
+
+            ReadOnlySequence<byte> sequence = JsonTestHelper.CreateSegments(utf8Data);
+            found = false;
+
+            json = new Utf8JsonReader(sequence, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(Encoding.UTF8.GetBytes(lookupString)) || json.TextEquals(lookupString.AsSpan()))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.False(found);
+        }
+
+        [Fact]
+        public static void TextEqualsEscapedCharAtTheLastSegment()
+        {
+            string jsonString = "\"aaaaaa\\u0061\"";
+            string lookupString = "aaaaaaa";
+            byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+
+            bool found = false;
+
+            var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(Encoding.UTF8.GetBytes(lookupString)) || json.TextEquals(lookupString.AsSpan()))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.True(found);
+
+            ReadOnlySequence<byte> sequence = JsonTestHelper.CreateSegments(utf8Data);
+            found = false;
+
+            json = new Utf8JsonReader(sequence, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(Encoding.UTF8.GetBytes(lookupString)) || json.TextEquals(lookupString.AsSpan()))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.True(found);
+        }
+
+        [Fact]
+        public static void TestTextEqualsMismatchMultiSegment()
+        {
+            string jsonString = "\"Hi, \\\"Ahson\\\"!\"";
+            byte[] lookup = Encoding.UTF8.GetBytes("Hello, \"Ahson\"");
+            byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+            bool found = false;
+
+            // Segment 1: "Hi, \"A
+            // Segment 2: hson\"!"
+            ReadOnlySequence<byte> sequence = JsonTestHelper.CreateSegments(utf8Data);
+
+            var json = new Utf8JsonReader(sequence, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(lookup) || json.TextEquals("Hello, \"Ahson\"".AsSpan()))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.False(found);
+        }
+
+        [Theory]
+        [InlineData("\"hello\"", new char[1] { (char)0xDC01 })]    // low surrogate - invalid
+        [InlineData("\"hello\"", new char[1] { (char)0xD801 })]    // high surrogate - missing pair
+        public static void InvalidUTF16Search(string jsonString, char[] lookup)
+        {
+            byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+            bool found = false;
+
+            var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(lookup))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.False(found);
+        }
+
+        [Fact]
+        [OuterLoop]
+        public static void ReallyLargeLookupUTF16()
+        {
+            string jsonString = "\"hello\"";
+            string lookup = new string('a', 1_000_000_000);
+            byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+            bool found = false;
+
+            var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    try
+                    {
+                        if (json.TextEquals(lookup.AsSpan()))
+                        {
+                            found = true;
+                        }
+                        Assert.True(false, $"Expected OverflowException was not thrown when calling TextEquals with large lookup string");
+                    }
+                    catch (OverflowException)
+                    { }
+                }
+            }
+
+            Assert.False(found);
+        }
+
+        [Fact]
+        public static void ReallyLargeLookupUTF8()
+        {
+            string jsonString = "\"hello\"";
+            byte[] lookup = new byte[1_000_000_000];
+            lookup.AsSpan().Fill((byte)'a');
+            byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+            bool found = false;
+
+            var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state: default);
+            while (json.Read())
+            {
+                if (json.TokenType == JsonTokenType.String)
+                {
+                    if (json.TextEquals(lookup))
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+
+            Assert.False(found);
+        }
+
+        [Theory]
+        [InlineData("/*comment*/[1234, true, false, /*comment*/ null, {}]/*comment*/")]
+        public static void TestTextEqualsInvalid(string jsonString)
+        {
+            byte[] utf8Data = Encoding.UTF8.GetBytes(jsonString);
+
+            var state = new JsonReaderState(options: new JsonReaderOptions { CommentHandling = JsonCommentHandling.Allow });
+            var json = new Utf8JsonReader(utf8Data, isFinalBlock: true, state);
+
+            try
+            {
+                json.TextEquals(default(ReadOnlySpan<byte>));
+                Assert.True(false, $"Expected InvalidOperationException was not thrown when calling TextEquals with TokenType = {json.TokenType}");
+            }
+            catch (InvalidOperationException)
+            { }
+
+            try
+            {
+                json.TextEquals(default(ReadOnlySpan<char>));
+                Assert.True(false, $"Expected InvalidOperationException was not thrown when calling TextEquals(char) with TokenType = {json.TokenType}");
+            }
+            catch (InvalidOperationException)
+            { }
+
+            while (json.Read())
+            {
+                try
+                {
+                    json.TextEquals(default(ReadOnlySpan<byte>));
+                    Assert.True(false, $"Expected InvalidOperationException was not thrown when calling TextEquals with TokenType = {json.TokenType}");
+                }
+                catch (InvalidOperationException)
+                { }
+
+                try
+                {
+                    json.TextEquals(default(ReadOnlySpan<char>));
+                    Assert.True(false, $"Expected InvalidOperationException was not thrown when calling TextEquals(char) with TokenType = {json.TokenType}");
+                }
+                catch (InvalidOperationException)
+                { }
+            }
+
+            Assert.Equal(utf8Data.Length, json.BytesConsumed);
+        }
+    }
+}
index 2563944..806856d 100644 (file)
@@ -190,7 +190,7 @@ namespace System.Text.Json.Tests
         }
 
         [Fact]
-        public static void TestPartialJsonReader()
+        public static void TestSingleStrings()
         {
             string jsonString = "\"Hello, \\u0041hson!\"";
             string expectedString = "Hello, \\u0041hson!, ";