Fix BytesConsumed when reading JSON numbers within a multi-segment ReadOnlySequence...
authorAhson Khan <ahson_ahmedk@yahoo.com>
Fri, 16 Aug 2019 04:12:23 +0000 (21:12 -0700)
committerGitHub <noreply@github.com>
Fri, 16 Aug 2019 04:12:23 +0000 (21:12 -0700)
* Add tests for reading numbers when data is within a multi-segment
ReadOnlySequence.

* Test rename and add debug.assert.

* Test clean up - remove unnecessary/expensive Debug.Assert.

* Test and code cleanup.

* Address feedback - add stronger test condition for token length.

Commit migrated from https://github.com/dotnet/corefx/commit/1b0f28942ccafca8a56f960536e125301e7b9306

src/libraries/System.Text.Json/src/System/Text/Json/Reader/Utf8JsonReader.MultiSegment.cs
src/libraries/System.Text.Json/tests/JsonTestHelper.cs
src/libraries/System.Text.Json/tests/Utf8JsonReaderTests.MultiSegment.cs

index ec633df..8d53bca 100644 (file)
@@ -66,7 +66,9 @@ namespace System.Text.Json
             {
                 _currentPosition = jsonData.Start;
                 _nextPosition = _currentPosition;
-                if (_buffer.Length == 0)
+
+                bool firstSegmentIsEmpty = _buffer.Length == 0;
+                if (firstSegmentIsEmpty)
                 {
                     // Once we find a non-empty segment, we need to set current position to it.
                     // Therefore, track the next position in a copy before it gets advanced to the next segment.
@@ -84,7 +86,15 @@ namespace System.Text.Json
                     }
                 }
 
-                _isLastSegment = !jsonData.TryGet(ref _nextPosition, out _, advance: true) && isFinalBlock; // Don't re-order to avoid short-circuiting
+                // If firstSegmentIsEmpty is true,
+                //    only check if we have reached the last segment but do not advance _nextPosition. The while loop above already advanced it.
+                //    Otherwise, we would end up skipping a segment (i.e. advance = false).
+                // If firstSegmentIsEmpty is false,
+                //    make sure to advance _nextPosition so that it is no longer the same as _currentPosition (i.e. advance = true).
+                _isLastSegment = !jsonData.TryGet(ref _nextPosition, out _, advance: !firstSegmentIsEmpty) && isFinalBlock; // Don't re-order to avoid short-circuiting
+
+                Debug.Assert(!_nextPosition.Equals(_currentPosition));
+
                 _isMultiSegment = true;
             }
         }
@@ -1309,6 +1319,7 @@ namespace System.Text.Json
 
         private ConsumeNumberResult ConsumeNegativeSignMultiSegment(ref ReadOnlySpan<byte> data, ref int i)
         {
+            Debug.Assert(i == 0);
             byte nextByte = data[i];
 
             if (nextByte == '-')
@@ -1329,7 +1340,8 @@ namespace System.Text.Json
                         }
                         return ConsumeNumberResult.NeedMoreData;
                     }
-                    _totalConsumed++;
+                    Debug.Assert(i == 1);
+                    _totalConsumed += i;
                     HasValueSequence = true;
                     i = 0;
                     data = _buffer;
@@ -1347,9 +1359,10 @@ namespace System.Text.Json
         private ConsumeNumberResult ConsumeZeroMultiSegment(ref ReadOnlySpan<byte> data, ref int i)
         {
             Debug.Assert(data[i] == (byte)'0');
+            Debug.Assert(i == 0 || i == 1);
             i++;
             _bytePositionInLine++;
-            byte nextByte = default;
+            byte nextByte;
             if (i < data.Length)
             {
                 nextByte = data[i];
@@ -1377,7 +1390,7 @@ namespace System.Text.Json
                     return ConsumeNumberResult.NeedMoreData;
                 }
 
-                _totalConsumed++;
+                _totalConsumed += i;
                 HasValueSequence = true;
                 i = 0;
                 data = _buffer;
@@ -1522,6 +1535,7 @@ namespace System.Text.Json
                     }
                     return ConsumeNumberResult.NeedMoreData;
                 }
+                _totalConsumed += i;
                 HasValueSequence = true;
                 i = 0;
                 data = _buffer;
@@ -1547,7 +1561,7 @@ namespace System.Text.Json
                         }
                         return ConsumeNumberResult.NeedMoreData;
                     }
-                    _totalConsumed++;
+                    _totalConsumed += i;
                     HasValueSequence = true;
                     i = 0;
                     data = _buffer;
index 94179f6..87dd679 100644 (file)
@@ -4,6 +4,7 @@
 
 using System.Buffers;
 using System.Collections.Generic;
+using System.Diagnostics;
 using System.Globalization;
 using System.IO;
 using System.Text.Json.Tests;
@@ -157,20 +158,49 @@ namespace System.Text.Json
             return new ReadOnlySequence<byte>(firstSegment, 0, secondSegment, secondMem.Length);
         }
 
-        public static ReadOnlySequence<byte> GetSequence(byte[] _dataUtf8, int segmentSize)
+        public static ReadOnlySequence<byte> CreateSegments(byte[] data, int splitLocation)
         {
-            int numberOfSegments = _dataUtf8.Length / segmentSize + 1;
+            Debug.Assert(splitLocation <= data.Length);
+
+            ReadOnlyMemory<byte> dataMemory = data;
+
+            var firstSegment = new BufferSegment<byte>(dataMemory.Slice(0, splitLocation));
+            ReadOnlyMemory<byte> secondMem = dataMemory.Slice(splitLocation);
+            BufferSegment<byte> secondSegment = firstSegment.Append(secondMem);
+
+            return new ReadOnlySequence<byte>(firstSegment, 0, secondSegment, secondMem.Length);
+        }
+
+        public static ReadOnlySequence<byte> CreateSegments(byte[] data, int firstSplit, int secondSplit)
+        {
+            Debug.Assert(firstSplit <= data.Length && secondSplit <= data.Length && firstSplit <= secondSplit);
+
+            ReadOnlyMemory<byte> dataMemory = data;
+
+            var firstSegment = new BufferSegment<byte>(dataMemory.Slice(0, firstSplit));
+            ReadOnlyMemory<byte> secondMem = dataMemory.Slice(firstSplit, secondSplit - firstSplit);
+            BufferSegment<byte> secondSegment = firstSegment.Append(secondMem);
+
+            ReadOnlyMemory<byte> thirdMem = dataMemory.Slice(secondSplit);
+            BufferSegment<byte> thirdSegment = secondSegment.Append(thirdMem);
+
+            return new ReadOnlySequence<byte>(firstSegment, 0, thirdSegment, thirdMem.Length);
+        }
+
+        public static ReadOnlySequence<byte> GetSequence(byte[] dataUtf8, int segmentSize)
+        {
+            int numberOfSegments = dataUtf8.Length / segmentSize + 1;
             byte[][] buffers = new byte[numberOfSegments][];
 
             for (int j = 0; j < numberOfSegments - 1; j++)
             {
                 buffers[j] = new byte[segmentSize];
-                Array.Copy(_dataUtf8, j * segmentSize, buffers[j], 0, segmentSize);
+                Array.Copy(dataUtf8, j * segmentSize, buffers[j], 0, segmentSize);
             }
 
-            int remaining = _dataUtf8.Length % segmentSize;
+            int remaining = dataUtf8.Length % segmentSize;
             buffers[numberOfSegments - 1] = new byte[remaining];
-            Array.Copy(_dataUtf8, _dataUtf8.Length - remaining, buffers[numberOfSegments - 1], 0, remaining);
+            Array.Copy(dataUtf8, dataUtf8.Length - remaining, buffers[numberOfSegments - 1], 0, remaining);
 
             return BufferFactory.Create(buffers);
         }
index ee5b201..501da19 100644 (file)
@@ -38,6 +38,111 @@ namespace System.Text.Json.Tests
         }
 
         [Fact]
+        public static void EmptyJsonMultiSegmentIsInvalid()
+        {
+            ReadOnlyMemory<byte> dataMemory = Array.Empty<byte>();
+
+            var firstSegment = new BufferSegment<byte>(dataMemory.Slice(0, 0));
+            ReadOnlyMemory<byte> secondMem = dataMemory.Slice(0, 0);
+            BufferSegment<byte> secondSegment = firstSegment.Append(secondMem);
+
+            var sequence = new ReadOnlySequence<byte>(firstSegment, 0, secondSegment, secondMem.Length);
+
+            Assert.ThrowsAny<JsonException>(() =>
+            {
+                var json = new Utf8JsonReader(sequence, isFinalBlock: true, state: default);
+
+                Assert.Equal(0, json.BytesConsumed);
+                Assert.Equal(0, json.TokenStartIndex);
+                Assert.Equal(0, json.CurrentDepth);
+                Assert.Equal(JsonTokenType.None, json.TokenType);
+                Assert.NotEqual(default, json.Position);
+                Assert.False(json.HasValueSequence);
+                Assert.True(json.ValueSpan.SequenceEqual(default));
+                Assert.True(json.ValueSequence.IsEmpty);
+
+                Assert.Equal(64, json.CurrentState.Options.MaxDepth);
+                Assert.False(json.CurrentState.Options.AllowTrailingCommas);
+                Assert.Equal(JsonCommentHandling.Disallow, json.CurrentState.Options.CommentHandling);
+
+                json.Read(); // this should throw
+            });
+        }
+
+        [Theory]
+        [InlineData("2e2", 200, 3)]
+        [InlineData("2e+2", 200, 4)]
+        [InlineData("123e-01", 12.3, 7)]
+        [InlineData("0", 0, 1)]
+        [InlineData("0.1", 0.1, 3)]
+        [InlineData("-0", 0, 2)]
+        [InlineData("-0.1", -0.1, 4)]
+        [InlineData("   2e2   ", 200, 3)]
+        [InlineData("   2e+2   ", 200, 4)]
+        [InlineData("   123e-01   ", 12.3, 7)]
+        [InlineData("   0   ", 0, 1)]
+        [InlineData("   0.1   ", 0.1, 3)]
+        [InlineData("   -0   ", 0, 2)]
+        [InlineData("   -0.1   ", -0.1, 4)]
+        [InlineData("[2e2]", 200, 3)]
+        [InlineData("[2e+2]", 200, 4)]
+        [InlineData("[123e-01]", 12.3, 7)]
+        [InlineData("[0]", 0, 1)]
+        [InlineData("[0.1]", 0.1, 3)]
+        [InlineData("[-0]", 0, 2)]
+        [InlineData("[-0.1]", -0.1, 4)]
+        [InlineData("{\"foo\": 2e2}", 200, 3)]
+        [InlineData("{\"foo\": 2e+2}", 200, 4)]
+        [InlineData("{\"foo\": 123e-01}", 12.3, 7)]
+        [InlineData("{\"foo\": 0}", 0, 1)]
+        [InlineData("{\"foo\": 0.1}", 0.1, 3)]
+        [InlineData("{\"foo\": -0}", 0, 2)]
+        [InlineData("{\"foo\": -0.1}", -0.1, 4)]
+        public static void ReadJsonWithNumberVariousSegmentSizes(string input, double expectedValue, long expectedTokenLength)
+        {
+            byte[] utf8 = Encoding.UTF8.GetBytes(input);
+
+            var jsonReader = new Utf8JsonReader(utf8);
+            ReadDoubleHelper(ref jsonReader, utf8.Length, expectedValue, expectedTokenLength);
+
+            ReadOnlySequence<byte> sequence = JsonTestHelper.GetSequence(utf8, 1);
+            jsonReader = new Utf8JsonReader(sequence);
+            ReadDoubleHelper(ref jsonReader, utf8.Length, expectedValue, expectedTokenLength);
+
+            for (int splitLocation = 0; splitLocation < utf8.Length; splitLocation++)
+            {
+                sequence = JsonTestHelper.CreateSegments(utf8, splitLocation);
+                jsonReader = new Utf8JsonReader(sequence);
+                ReadDoubleHelper(ref jsonReader, utf8.Length, expectedValue, expectedTokenLength);
+            }
+
+            for (int firstSplit = 0; firstSplit < utf8.Length; firstSplit++)
+            {
+                for (int secondSplit = firstSplit; secondSplit < utf8.Length; secondSplit++)
+                {
+                    sequence = JsonTestHelper.CreateSegments(utf8, firstSplit, secondSplit);
+                    jsonReader = new Utf8JsonReader(sequence);
+                    ReadDoubleHelper(ref jsonReader, utf8.Length, expectedValue, expectedTokenLength);
+                }
+            }
+        }
+
+        private static void ReadDoubleHelper(ref Utf8JsonReader jsonReader, int expectedBytesConsumed, double expectedValue, long expectedTokenLength)
+        {
+            while (jsonReader.Read())
+            {
+                if (jsonReader.TokenType == JsonTokenType.Number)
+                {
+                    long tokenLength = jsonReader.HasValueSequence ? jsonReader.ValueSequence.Length : jsonReader.ValueSpan.Length;
+                    Assert.Equal(expectedTokenLength, tokenLength);
+                    Assert.Equal(tokenLength, jsonReader.BytesConsumed - jsonReader.TokenStartIndex);
+                    Assert.Equal(expectedValue, jsonReader.GetDouble());
+                }
+            }
+            Assert.Equal(expectedBytesConsumed, jsonReader.BytesConsumed);
+        }
+
+        [Fact]
         public static void InitialStateSimpleCtorMultiSegment()
         {
             byte[] utf8 = Encoding.UTF8.GetBytes("1");