Utf8JsonReader.cs - Add support for single line comments ending on \r, \r\n (dotnet...
authorSteven Weerdenburg <stevenaw@users.noreply.github.com>
Fri, 8 Mar 2019 03:49:06 +0000 (22:49 -0500)
committerAhson Khan <ahkha@microsoft.com>
Fri, 8 Mar 2019 03:49:06 +0000 (19:49 -0800)
* Add support for single line comments ending on \r, \r\n

* Single Segment done

* Move single segment tests over. Add 2/3 multisegment tests

* Multi-segment tests

* Fix formatting in tests file

* Fix formatting feedback. Fix failing test post-rebase

* Modified logic and added new test cases

* Changes related to review comments

* Fixes to tests

* Remove Verify Info, fix issues

* Fix NETFX_x86 test failure

Commit migrated from https://github.com/dotnet/corefx/commit/c25d5b76dcb2a53e032a333a5cce607ed565345d

src/libraries/System.Text.Json/src/System/Text/Json/Reader/Utf8JsonReader.MultiSegment.cs
src/libraries/System.Text.Json/src/System/Text/Json/Reader/Utf8JsonReader.cs
src/libraries/System.Text.Json/tests/Utf8JsonReaderTests.MultiSegment.cs
src/libraries/System.Text.Json/tests/Utf8JsonReaderTests.cs

index 01b6c39..9ff68fd 100644 (file)
@@ -2133,31 +2133,57 @@ namespace System.Text.Json
         private bool SkipSingleLineCommentMultiSegment(ReadOnlySpan<byte> localBuffer, int leftOver)
         {
             long prevTotalConsumed = _totalConsumed;
-            int idx;
+            int idx = -1;
+            bool expectLF = false;
             do
             {
-                // TODO: https://github.com/dotnet/corefx/issues/33293
-                idx = localBuffer.IndexOf(JsonConstants.LineFeed);
-                if (idx == -1)
+                if (expectLF)
                 {
-                    if (IsLastSpan)
+                    if (localBuffer[0] == JsonConstants.LineFeed)
                     {
-                        idx = localBuffer.Length;
-                        // Assume everything on this line is a comment and there is no more data.
-                        _bytePositionInLine += 2 + localBuffer.Length;
-                        goto Done;
+                        idx++;
+                    }
+                    break;
+                }
+                idx = localBuffer.IndexOfAny(JsonConstants.LineFeed, JsonConstants.CarriageReturn);
+                if (idx != -1)
+                {
+                    if (localBuffer[idx] == JsonConstants.LineFeed)
+                    {
+                        break;
                     }
 
-                    if (!GetNextSpan())
+                    // If we are here, we have definintely found a \r. So now to check if \n follows.
+                    Debug.Assert(localBuffer[idx] == JsonConstants.CarriageReturn);
+
+                    if (idx < localBuffer.Length - 1)
                     {
-                        _totalConsumed = prevTotalConsumed;
-                        return false;
+                        if (localBuffer[idx + 1] == JsonConstants.LineFeed)
+                        {
+                            idx++;
+                        }
+                        break;
                     }
-                    _totalConsumed += localBuffer.Length + leftOver;
-                    leftOver = 0;
-                    localBuffer = _buffer;
+                    expectLF = true;
+                }
+                if (IsLastSpan)
+                {
+                    idx = localBuffer.Length;
+                    // Assume everything on this line is a comment and there is no more data.
+                    _bytePositionInLine += 2 + localBuffer.Length;
+                    goto Done;
                 }
-            } while (idx == -1);
+
+                if (!GetNextSpan())
+                {
+                    _totalConsumed = prevTotalConsumed;
+                    return false;
+                }
+                _totalConsumed += localBuffer.Length + leftOver;
+                leftOver = 0;
+                localBuffer = _buffer;
+                idx = -1;
+            } while (true);
 
             idx++;
             _bytePositionInLine = 0;
@@ -2292,32 +2318,59 @@ namespace System.Text.Json
         private bool ConsumeSingleLineCommentMultiSegment(ReadOnlySpan<byte> localBuffer, int leftOver, SequencePosition start, int previousConsumed)
         {
             long prevTotalConsumed = _totalConsumed;
-            int idx;
+            int idx = -1;
+            bool expectLF = false;
             do
             {
-                // TODO: https://github.com/dotnet/corefx/issues/33293
-                idx = localBuffer.IndexOf(JsonConstants.LineFeed);
-                if (idx == -1)
+                if (expectLF)
                 {
-                    if (IsLastSpan)
+                    if (localBuffer[0] == JsonConstants.LineFeed)
                     {
-                        idx = localBuffer.Length;
-                        // Assume everything on this line is a comment and there is no more data.
-                        _bytePositionInLine += 2 + localBuffer.Length;
-                        goto Done;
+                        idx++;
+                    }
+                    HasValueSequence = true;
+                    break;
+                }
+                idx = localBuffer.IndexOfAny(JsonConstants.LineFeed, JsonConstants.CarriageReturn);
+                if (idx != -1)
+                {
+                    if (localBuffer[idx] == JsonConstants.LineFeed)
+                    {
+                        break;
                     }
 
-                    if (!GetNextSpan())
+                    // If we are here, we have definintely found a \r. So now to check if \n follows.
+                    Debug.Assert(localBuffer[idx] == JsonConstants.CarriageReturn);
+
+                    if (idx < localBuffer.Length - 1)
                     {
-                        _totalConsumed = prevTotalConsumed;
-                        return false;
+                        if (localBuffer[idx + 1] == JsonConstants.LineFeed)
+                        {
+                            idx++;
+                        }
+                        break;
                     }
-                    HasValueSequence = true;
-                    _totalConsumed += localBuffer.Length + leftOver;
-                    leftOver = 0;
-                    localBuffer = _buffer;
+                    expectLF = true;
+                }
+                if (IsLastSpan)
+                {
+                    idx = localBuffer.Length;
+                    // Assume everything on this line is a comment and there is no more data.
+                    _bytePositionInLine += 2 + localBuffer.Length;
+                    goto Done;
                 }
-            } while (idx == -1);
+
+                if (!GetNextSpan())
+                {
+                    _totalConsumed = prevTotalConsumed;
+                    return false;
+                }
+                HasValueSequence = true;
+                _totalConsumed += localBuffer.Length + leftOver;
+                leftOver = 0;
+                localBuffer = _buffer;
+                idx = -1;
+            } while (true);
 
             idx++;
             _bytePositionInLine = 0;
index 08f9507..fc655b8 100644 (file)
@@ -1712,23 +1712,43 @@ namespace System.Text.Json
 
         private bool SkipSingleLineComment(ReadOnlySpan<byte> localBuffer, out int idx)
         {
-            // TODO: https://github.com/dotnet/corefx/issues/33293
-            idx = localBuffer.IndexOf(JsonConstants.LineFeed);
-            if (idx == -1)
+            idx = localBuffer.IndexOfAny(JsonConstants.LineFeed, JsonConstants.CarriageReturn);
+            if (idx != -1)
             {
-                if (IsLastSpan)
+                if (localBuffer[idx] == JsonConstants.LineFeed)
                 {
-                    idx = localBuffer.Length;
-                    // Assume everything on this line is a comment and there is no more data.
-                    _bytePositionInLine += 2 + localBuffer.Length;
-                    goto Done;
+                    goto EndOfComment;
+                }
+
+                // If we are here, we have definintely found a \r. So now to check if \n follows.
+                Debug.Assert(localBuffer[idx] == JsonConstants.CarriageReturn);
+
+                if (idx < localBuffer.Length - 1)
+                {
+                    if (localBuffer[idx + 1] == JsonConstants.LineFeed)
+                    {
+                        idx++;
+                    }
+                    goto EndOfComment;
                 }
+            }
+            if (IsLastSpan)
+            {
+                idx = localBuffer.Length;
+                // Assume everything on this line is a comment and there is no more data.
+                _bytePositionInLine += 2 + localBuffer.Length;
+                goto Done;
+            }
+            else
+            {
                 return false;
             }
 
+        EndOfComment:
             idx++;
             _bytePositionInLine = 0;
             _lineNumber++;
+
         Done:
             _consumed += 2 + idx;
             return true;
index ca81480..0e54851 100644 (file)
@@ -310,6 +310,9 @@ namespace System.Text.Json.Tests
         [InlineData("{\"test\":[[[123,456]]]}", "test123456", "test123456")]
         [InlineData("/*a*//*z*/[/*b*//*z*/123/*c*//*z*/,/*d*//*z*/456/*e*//*z*/]/*f*//*z*/", "123456", "/*a*//*z*//*b*//*z*/123/*c*//*z*//*d*//*z*/456/*e*//*z*//*f*//*z*/")]
         [InlineData("[123,/*hi*/456/*bye*/]", "123456", "123/*hi*/456/*bye*/")]
+        [InlineData("[123,//hi\n456//bye\n]", "123456", "123//hi\n456//bye\n")]
+        [InlineData("[123,//hi\r456//bye\r]", "123456", "123//hi\r456//bye\r")]
+        [InlineData("[123,//hi\r\n456]", "123456", "123//hi\r\n456")]
         [InlineData("/*a*//*z*/{/*b*//*z*/\"test\":/*c*//*z*/[/*d*//*z*/[/*e*//*z*/[/*f*//*z*/123/*g*//*z*/,/*h*//*z*/456/*i*//*z*/]/*j*//*z*/]/*k*//*z*/]/*l*//*z*/}/*m*//*z*/",
     "test123456", "/*a*//*z*//*b*//*z*/test/*c*//*z*//*d*//*z*//*e*//*z*//*f*//*z*/123/*g*//*z*//*h*//*z*/456/*i*//*z*//*j*//*z*//*k*//*z*//*l*//*z*//*m*//*z*/")]
         [InlineData("//a\n//z\n{//b\n//z\n\"test\"://c\n//z\n[//d\n//z\n[//e\n//z\n[//f\n//z\n123//g\n//z\n,//h\n//z\n456//i\n//z\n]//j\n//z\n]//k\n//z\n]//l\n//z\n}//m\n//z\n",
@@ -562,5 +565,45 @@ namespace System.Text.Json.Tests
                 Assert.True(json.ValueSequence.IsEmpty);
             }
         }
+
+        [Theory]
+        [MemberData(nameof(SingleLineCommentData))]
+        public static void ConsumeSingleLineCommentMultiSpanTest(string expected)
+        {
+            string jsonData = "{" + expected + "}";
+            byte[] dataUtf8 = Encoding.UTF8.GetBytes(jsonData);
+            ReadOnlySequence<byte> sequence = JsonTestHelper.GetSequence(dataUtf8, 1);
+
+            for (int i = 0; i < jsonData.Length; i++)
+            {
+                var state = new JsonReaderState(options: new JsonReaderOptions { CommentHandling = JsonCommentHandling.Allow });
+
+                var json = new Utf8JsonReader(sequence.Slice(0, i), isFinalBlock: false, state);
+                VerifyReadLoop(ref json, expected);
+
+                json = new Utf8JsonReader(sequence.Slice(state.BytesConsumed), isFinalBlock: true, state);
+                VerifyReadLoop(ref json, expected);
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(SingleLineCommentData))]
+        public static void SkipSingleLineCommentMultiSpanTest(string expected)
+        {
+            string jsonData = "{" + expected + "}";
+            byte[] dataUtf8 = Encoding.UTF8.GetBytes(jsonData);
+            ReadOnlySequence<byte> sequence = JsonTestHelper.GetSequence(dataUtf8, 1);
+
+            for (int i = 0; i < jsonData.Length; i++)
+            {
+                var state = new JsonReaderState(options: new JsonReaderOptions { CommentHandling = JsonCommentHandling.Skip });
+
+                var json = new Utf8JsonReader(sequence.Slice(0, i), isFinalBlock: false, state);
+                VerifyReadLoop(ref json, null);
+
+                json = new Utf8JsonReader(sequence.Slice(state.BytesConsumed), isFinalBlock: true, state);
+                VerifyReadLoop(ref json, null);
+            }
+        }
     }
 }
index 9232a00..31eac06 100644 (file)
@@ -2,9 +2,11 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Buffers;
 using System.Collections.Generic;
 using System.Globalization;
 using System.IO;
+using System.Linq;
 using Newtonsoft.Json;
 using Xunit;
 
@@ -486,6 +488,9 @@ namespace System.Text.Json.Tests
         [InlineData("{\"test\":[[[123,456]]]}", "test123456", "test123456")]
         [InlineData("/*a*//*z*/[/*b*//*z*/123/*c*//*z*/,/*d*//*z*/456/*e*//*z*/]/*f*//*z*/", "123456", "/*a*//*z*//*b*//*z*/123/*c*//*z*//*d*//*z*/456/*e*//*z*//*f*//*z*/")]
         [InlineData("[123,/*hi*/456/*bye*/]", "123456", "123/*hi*/456/*bye*/")]
+        [InlineData("[123,//hi\n456//bye\n]", "123456", "123//hi\n456//bye\n")]
+        [InlineData("[123,//hi\r456//bye\r]", "123456", "123//hi\r456//bye\r")]
+        [InlineData("[123,//hi\r\n456\r\n]", "123456", "123//hi\r\n456")]
         [InlineData("/*a*//*z*/{/*b*//*z*/\"test\":/*c*//*z*/[/*d*//*z*/[/*e*//*z*/[/*f*//*z*/123/*g*//*z*/,/*h*//*z*/456/*i*//*z*/]/*j*//*z*/]/*k*//*z*/]/*l*//*z*/}/*m*//*z*/",
     "test123456", "/*a*//*z*//*b*//*z*/test/*c*//*z*//*d*//*z*//*e*//*z*//*f*//*z*/123/*g*//*z*//*h*//*z*/456/*i*//*z*//*j*//*z*//*k*//*z*//*l*//*z*//*m*//*z*/")]
         [InlineData("//a\n//z\n{//b\n//z\n\"test\"://c\n//z\n[//d\n//z\n[//e\n//z\n[//f\n//z\n123//g\n//z\n,//h\n//z\n456//i\n//z\n]//j\n//z\n]//k\n//z\n]//l\n//z\n}//m\n//z\n",
@@ -978,6 +983,9 @@ namespace System.Text.Json.Tests
         [InlineData("\"h\u6F22\u5B57ello\"//This is a \u6F22\u5B57comment after json with new line\n", "//This is a \u6F22\u5B57comment after json with new line\n", 64)]
         [InlineData("{\"a\u6F22\u5B57ge\" : \n//This is a \u6F22\u5B57comment between key-value pairs\n 30}", "//This is a \u6F22\u5B57comment between key-value pairs\n", 66)]
         [InlineData("{\"a\u6F22\u5B57ge\" : 30//This is a \u6F22\u5B57comment between key-value pairs on the same line\n}", "//This is a \u6F22\u5B57comment between key-value pairs on the same line\n", 84)]
+        [InlineData("\"a\u6F22\u5B57lpha\" \r\n//This is a comment with a carriage return\r//Another single-line comment", "//This is a comment with a carriage return\r", 59)]
+        [InlineData("\"a\u6F22\u5B57lpha\" \r\n//This is a comment with a line break\n//Another single-line comment", "//This is a comment with a line break\n", 54)]
+        [InlineData("\"a\u6F22\u5B57lpha\" \r\n//This is a comment with a carriage return and line break\r\n//Another single-line comment", "//This is a comment with a carriage return and line break\r\n", 75)]
 
         [InlineData("/*T\u6F22\u5B57his is a multi-line \u6F22\u5B57comment before json*/\"hello\"", "/*T\u6F22\u5B57his is a multi-line \u6F22\u5B57comment before json*/", 56)]
         [InlineData("\"h\u6F22\u5B57ello\"/*This is a multi-line \u6F22\u5B57comment after json*/", "/*This is a multi-line \u6F22\u5B57comment after json*/", 62)]
@@ -1095,6 +1103,9 @@ namespace System.Text.Json.Tests
         [InlineData("\"h\u6F22\u5B57ello\"//This is a \u6F22\u5B57comment after json with new line\n", 52)]
         [InlineData("{\"a\u6F22\u5B57ge\" : \n//This is a \u6F22\u5B57comment between key-value pairs\n 30}", 54)]
         [InlineData("{\"a\u6F22\u5B57ge\" : 30//This is a \u6F22\u5B57comment between key-value pairs on the same line\n}", 72)]
+        [InlineData("\"a\u6F22\u5B57lpha\" \r\n//This is a comment with a carriage return\r//Another single-line comment", 59)]
+        [InlineData("\"a\u6F22\u5B57lpha\" \r\n//This is a comment with a line break\n//Another single-line comment", 54)]
+        [InlineData("\"a\u6F22\u5B57lpha\" \r\n//This is a comment with a carriage return and line break\r\n//Another single-line comment", 75)]
 
         [InlineData("/*T\u6F22\u5B57his is a multi-line \u6F22\u5B57comment before json*/\"hello\"", 44)]
         [InlineData("\"h\u6F22\u5B57ello\"/*This is a multi-line \u6F22\u5B57comment after json*/", 50)]
@@ -1156,6 +1167,9 @@ namespace System.Text.Json.Tests
         [InlineData("\"d\u6F22\u5B57elta\" \r\n/*This is a multi-line \u6F22\u5B57comment after json*///Here is another comment\n/*and a multi-line comment*///Another single-line comment\n\t  /*blah * blah*/", 53)]
         [InlineData("{\"a\u6F22\u5B57ge\" : \n/*This is a \u6F22\u5B57comment between key-value pairs*/ 30}", 55)]
         [InlineData("{\"a\u6F22\u5B57ge\" : 30/*This is a \u6F22\u5B57comment between key-value pairs on the same line*/}", 73)]
+        [InlineData("\"a\u6F22\u5B57lpha\" \r\n//This is a comment with a carriage return\r//Another single-line comment", 59)]
+        [InlineData("\"a\u6F22\u5B57lpha\" \r\n//This is a comment with a line break\n//Another single-line comment", 54)]
+        [InlineData("\"a\u6F22\u5B57lpha\" \r\n//This is a comment with a carriage return and line break\r\n//Another single-line comment", 75)]
 
         [InlineData("/*T\u6F22\u5B57his is a split multi-line \n\u6F22\u5B57comment before json*/\"hello\"", 51)]
         [InlineData("\"h\u6F22\u5B57ello\"/*This is a split multi-line \n\u6F22\u5B57comment after json*/", 57)]
@@ -1757,6 +1771,69 @@ namespace System.Text.Json.Tests
             Assert.Equal(dataUtf8.Length, json.BytesConsumed);
         }
 
+        private static void VerifyReadLoop(ref Utf8JsonReader json, string expected)
+        {
+            while (json.Read())
+            {
+                switch (json.TokenType)
+                {
+                    case JsonTokenType.StartObject:
+                    case JsonTokenType.EndObject:
+                        break;
+                    case JsonTokenType.Comment:
+                        if (expected != null)
+                        {
+                            byte[] data = json.HasValueSequence ? json.ValueSequence.ToArray() : json.ValueSpan.ToArray();
+                            Assert.Equal(expected, Encoding.UTF8.GetString(data));
+                        }
+                        else
+                        {
+                            Assert.True(false);
+                        }
+                        break;
+                    default:
+                        Assert.True(false);
+                        break;
+                }
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(SingleLineCommentData))]
+        public static void ConsumeSingleLineCommentSingleSpanTest(string expected)
+        {
+            var jsonData = "{" + expected + "}";
+            byte[] dataUtf8 = Encoding.UTF8.GetBytes(jsonData);
+
+            for (int i = 0; i < jsonData.Length; i++)
+            {
+                var state = new JsonReaderState(options: new JsonReaderOptions { CommentHandling = JsonCommentHandling.Allow });
+                var json = new Utf8JsonReader(dataUtf8.AsSpan(0, i), isFinalBlock: false, state);
+                VerifyReadLoop(ref json, expected);
+
+                json = new Utf8JsonReader(dataUtf8.AsSpan((int)state.BytesConsumed), isFinalBlock: true, state);
+                VerifyReadLoop(ref json, expected);
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(SingleLineCommentData))]
+        public static void SkipSingleLineCommentSingleSpanTest(string expected)
+        {
+            var jsonData = "{" + expected + "}";
+            byte[] dataUtf8 = Encoding.UTF8.GetBytes(jsonData);
+
+            for (int i = 0; i < jsonData.Length; i++)
+            {
+                var state = new JsonReaderState(options: new JsonReaderOptions { CommentHandling = JsonCommentHandling.Skip });
+                var json = new Utf8JsonReader(dataUtf8.AsSpan(0, i), isFinalBlock: false, state);
+                VerifyReadLoop(ref json, null);
+
+                json = new Utf8JsonReader(dataUtf8.AsSpan((int)state.BytesConsumed), isFinalBlock: true, state);
+                VerifyReadLoop(ref json, null);
+            }
+        }
+
         public static IEnumerable<object[]> TestCases
         {
             get
@@ -1986,5 +2063,30 @@ namespace System.Text.Json.Tests
                 };
             }
         }
+
+        public static IEnumerable<object[]> SingleLineCommentData
+        {
+            get
+            {
+                return new List<object[]>
+                {
+                    // \r as the line separator
+                    new object [] {"//Comment\r" },
+                    new object [] {"//Comment\r" },
+                    new object [] {"//Comment\r" },
+
+                    // \r\n as line separator
+                    new object [] {"//Comment\r\n" },
+                    new object [] {"//Comment\r\n" },
+                    new object [] {"//Comment\r\n" },
+                    new object [] {"//Comment\r\n" },
+
+                    // \n as line separator
+                    new object [] {"//Comment\n" },
+                    new object [] {"//Comment\n" },
+                    new object [] {"//Comment\n" }
+                };
+            }
+        }
     }
 }