JsonDocument should allow the UTF-8 content-BOM from Stream inputs
authorJeremy Barton <jbarton@microsoft.com>
Sat, 9 Mar 2019 03:50:25 +0000 (19:50 -0800)
committerGitHub <noreply@github.com>
Sat, 9 Mar 2019 03:50:25 +0000 (19:50 -0800)
Commit migrated from https://github.com/dotnet/corefx/commit/232796d75d9492f273f61df9bb8afc05d33d6dc6

src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonDocument.Parse.cs
src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs
src/libraries/System.Text.Json/tests/JsonDocumentTests.cs

index 899802a..4b1545e 100644 (file)
@@ -292,13 +292,15 @@ namespace System.Text.Json
             int written = 0;
             byte[] rented = null;
 
+            ReadOnlySpan<byte> utf8Bom = JsonConstants.Utf8Bom;
+
             try
             {
                 if (stream.CanSeek)
                 {
                     // Ask for 1 more than the length to avoid resizing later,
                     // which is unnecessary in the common case where the stream length doesn't change.
-                    long expectedLength = Math.Max(0, stream.Length - stream.Position) + 1;
+                    long expectedLength = Math.Max(utf8Bom.Length, stream.Length - stream.Position) + 1;
                     rented = ArrayPool<byte>.Shared.Rent(checked((int)expectedLength));
                 }
                 else
@@ -308,6 +310,27 @@ namespace System.Text.Json
 
                 int lastRead;
 
+                // Read up to 3 bytes to see if it's the UTF-8 BOM
+                do
+                {
+                    // No need for checking for growth, the minimal rent sizes both guarantee it'll fit.
+                    Debug.Assert(rented.Length >= utf8Bom.Length);
+
+                    lastRead = stream.Read(
+                        rented,
+                        written,
+                        utf8Bom.Length - written);
+
+                    written += lastRead;
+                } while (lastRead > 0 && written < utf8Bom.Length);
+
+                // If we have 3 bytes, and they're the BOM, reset the write position to 0.
+                if (written == utf8Bom.Length &&
+                    utf8Bom.SequenceEqual(rented.AsSpan(0, utf8Bom.Length)))
+                {
+                    written = 0;
+                }
+
                 do
                 {
                     if (rented.Length == written)
@@ -353,11 +376,14 @@ namespace System.Text.Json
 
             try
             {
+                // Save the length to a local to be reused across awaits.
+                int utf8BomLength = JsonConstants.Utf8Bom.Length;
+
                 if (stream.CanSeek)
                 {
                     // Ask for 1 more than the length to avoid resizing later,
                     // which is unnecessary in the common case where the stream length doesn't change.
-                    long expectedLength = Math.Max(0, stream.Length - stream.Position) + 1;
+                    long expectedLength = Math.Max(utf8BomLength, stream.Length - stream.Position) + 1;
                     rented = ArrayPool<byte>.Shared.Rent(checked((int)expectedLength));
                 }
                 else
@@ -367,6 +393,28 @@ namespace System.Text.Json
 
                 int lastRead;
 
+                // Read up to 3 bytes to see if it's the UTF-8 BOM
+                do
+                {
+                    // No need for checking for growth, the minimal rent sizes both guarantee it'll fit.
+                    Debug.Assert(rented.Length >= JsonConstants.Utf8Bom.Length);
+
+                    lastRead = await stream.ReadAsync(
+                        rented,
+                        written,
+                        utf8BomLength - written,
+                        cancellationToken).ConfigureAwait(false);
+
+                    written += lastRead;
+                } while (lastRead > 0 && written < utf8BomLength);
+
+                // If we have 3 bytes, and they're the BOM, reset the write position to 0.
+                if (written == utf8BomLength &&
+                    JsonConstants.Utf8Bom.SequenceEqual(rented.AsSpan(0, utf8BomLength)))
+                {
+                    written = 0;
+                }
+
                 do
                 {
                     if (rented.Length == written)
index 2fc3b30..748c9d9 100644 (file)
@@ -23,6 +23,7 @@ namespace System.Text.Json
         public const byte FormFeed = (byte)'\f';
         public const byte Asterisk = (byte)'*';
 
+        public static ReadOnlySpan<byte> Utf8Bom => new byte[] { 0xEF, 0xBB, 0xBF };
         public static ReadOnlySpan<byte> TrueValue => new byte[] { (byte)'t', (byte)'r', (byte)'u', (byte)'e' };
         public static ReadOnlySpan<byte> FalseValue => new byte[] { (byte)'f', (byte)'a', (byte)'l', (byte)'s', (byte)'e' };
         public static ReadOnlySpan<byte> NullValue => new byte[] { (byte)'n', (byte)'u', (byte)'l', (byte)'l' };
index 9041519..dc2cb8b 100644 (file)
@@ -19,12 +19,30 @@ namespace System.Text.Json.Tests
 {
     public static class JsonDocumentTests
     {
+        private static readonly byte[] Utf8Bom = { 0xEF, 0xBB, 0xBF };
+
         private static readonly Dictionary<TestCaseType, string> s_expectedConcat =
             new Dictionary<TestCaseType, string>();
 
         private static readonly Dictionary<TestCaseType, string> s_compactJson =
             new Dictionary<TestCaseType, string>();
 
+        public static IEnumerable<object[]> BadBOMCases { get; } =
+            new object[][]
+            {
+                new object[] { "\u00EF" },
+                new object[] { "\u00EF1" },
+                new object[] { "\u00EF\u00BB" },
+                new object[] { "\u00EF\u00BB1" },
+                new object[] { "\u00EF\u00BB\u00BE" },
+                new object[] { "\u00EF\u00BB\u00BE1" },
+                new object[] { "\u00EF\u00BB\u00FB" },
+                new object[] { "\u00EF\u00BB\u00FB1" },
+
+                // Legal BOM, but no payload.
+                new object[] { "\u00EF\u00BB\u00BF" },
+            };
+
         public static IEnumerable<object[]> ReducedTestCases { get; } =
             new List<object[]>
             {
@@ -263,6 +281,150 @@ namespace System.Text.Json.Tests
                     GetAwaiter().GetResult());
         }
 
+
+        [Fact]
+        public static void ParseJson_SeekableStream_Small()
+        {
+            byte[] data = { (byte)'1', (byte)'1' };
+
+            using (JsonDocument doc = JsonDocument.Parse(new MemoryStream(data)))
+            {
+                JsonElement root = doc.RootElement;
+                Assert.Equal(JsonValueType.Number, root.Type);
+                Assert.Equal(11, root.GetInt32());
+            }
+        }
+
+        [Fact]
+        public static void ParseJson_UnseekableStream_Small()
+        {
+            byte[] data = { (byte)'1', (byte)'1' };
+
+            using (JsonDocument doc =
+                JsonDocument.Parse(new WrappedMemoryStream(canRead: true, canWrite: false, canSeek: false, data: data)))
+            {
+                JsonElement root = doc.RootElement;
+                Assert.Equal(JsonValueType.Number, root.Type);
+                Assert.Equal(11, root.GetInt32());
+            }
+        }
+
+        [Fact]
+        public static async Task ParseJson_SeekableStream_Small_Async()
+        {
+            byte[] data = { (byte)'1', (byte)'1' };
+
+            using (JsonDocument doc = await JsonDocument.ParseAsync(new MemoryStream(data)))
+            {
+                JsonElement root = doc.RootElement;
+                Assert.Equal(JsonValueType.Number, root.Type);
+                Assert.Equal(11, root.GetInt32());
+            }
+        }
+
+        [Fact]
+        public static async Task ParseJson_UnseekableStream_Small_Async()
+        {
+            byte[] data = { (byte)'1', (byte)'1' };
+
+            using (JsonDocument doc = await JsonDocument.ParseAsync(
+                new WrappedMemoryStream(canRead: true, canWrite: false, canSeek: false, data: data)))
+            {
+                JsonElement root = doc.RootElement;
+                Assert.Equal(JsonValueType.Number, root.Type);
+                Assert.Equal(11, root.GetInt32());
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(ReducedTestCases))]
+        public static void ParseJson_SeekableStream_WithBOM(bool compactData, TestCaseType type, string jsonString)
+        {
+            ParseJson(
+                compactData,
+                type,
+                jsonString,
+                null,
+                bytes => JsonDocument.Parse(new MemoryStream(Utf8Bom.Concat(bytes).ToArray())));
+        }
+        
+        [Theory]
+        [MemberData(nameof(ReducedTestCases))]
+        public static void ParseJson_SeekableStream_Async_WithBOM(bool compactData, TestCaseType type, string jsonString)
+        {
+            ParseJson(
+                compactData,
+                type,
+                jsonString,
+                null,
+                bytes => JsonDocument.ParseAsync(new MemoryStream(Utf8Bom.Concat(bytes).ToArray())).GetAwaiter().GetResult());
+        }
+
+        [Theory]
+        [MemberData(nameof(ReducedTestCases))]
+        public static void ParseJson_UnseekableStream_WithBOM(bool compactData, TestCaseType type, string jsonString)
+        {
+            ParseJson(
+                compactData,
+                type,
+                jsonString,
+                null,
+                bytes => JsonDocument.Parse(
+                    new WrappedMemoryStream(canRead: true, canWrite: false, canSeek: false, Utf8Bom.Concat(bytes).ToArray())));
+        }
+
+        [Theory]
+        [MemberData(nameof(ReducedTestCases))]
+        public static void ParseJson_UnseekableStream_Async_WithBOM(bool compactData, TestCaseType type, string jsonString)
+        {
+            ParseJson(
+                compactData,
+                type,
+                jsonString,
+                null,
+                bytes => JsonDocument.ParseAsync(
+                        new WrappedMemoryStream(canRead: true, canWrite: false, canSeek: false, Utf8Bom.Concat(bytes).ToArray())).
+                    GetAwaiter().GetResult());
+        }
+
+        [Theory]
+        [MemberData(nameof(BadBOMCases))]
+        public static void ParseJson_SeekableStream_BadBOM(string json)
+        {
+            byte[] data = Encoding.UTF8.GetBytes(json);
+            Assert.Throws<JsonReaderException>(() => JsonDocument.Parse(new MemoryStream(data)));
+        }
+
+        [Theory]
+        [MemberData(nameof(BadBOMCases))]
+        public static Task ParseJson_SeekableStream_Async_BadBOM(string json)
+        {
+            byte[] data = Encoding.UTF8.GetBytes(json);
+            return Assert.ThrowsAsync<JsonReaderException>(() => JsonDocument.ParseAsync(new MemoryStream(data)));
+        }
+
+        [Theory]
+        [MemberData(nameof(BadBOMCases))]
+        public static void ParseJson_UnseekableStream_BadBOM(string json)
+        {
+            byte[] data = Encoding.UTF8.GetBytes(json);
+
+            Assert.Throws<JsonReaderException>(
+                () => JsonDocument.Parse(
+                    new WrappedMemoryStream(canRead: true, canWrite: false, canSeek: false, data)));
+        }
+
+        [Theory]
+        [MemberData(nameof(BadBOMCases))]
+        public static Task ParseJson_UnseekableStream_Async_BadBOM(string json)
+        {
+            byte[] data = Encoding.UTF8.GetBytes(json);
+
+            return Assert.ThrowsAsync<JsonReaderException>(
+                () => JsonDocument.ParseAsync(
+                    new WrappedMemoryStream(canRead: true, canWrite: false, canSeek: false, data)));
+        }
+
         [Theory]
         [MemberData(nameof(ReducedTestCases))]
         public static void ParseJson_SequenceBytes_Single(bool compactData, TestCaseType type, string jsonString)