From: Jeremy Barton Date: Sat, 9 Mar 2019 03:50:25 +0000 (-0800) Subject: JsonDocument should allow the UTF-8 content-BOM from Stream inputs X-Git-Tag: submit/tizen/20210909.063632~11031^2~2227 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ee0fbacb333b4ee94e07a18b4be1b4f0bf324232;p=platform%2Fupstream%2Fdotnet%2Fruntime.git JsonDocument should allow the UTF-8 content-BOM from Stream inputs Commit migrated from https://github.com/dotnet/corefx/commit/232796d75d9492f273f61df9bb8afc05d33d6dc6 --- diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonDocument.Parse.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonDocument.Parse.cs index 899802a..4b1545e 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonDocument.Parse.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Document/JsonDocument.Parse.cs @@ -292,13 +292,15 @@ namespace System.Text.Json int written = 0; byte[] rented = null; + ReadOnlySpan utf8Bom = JsonConstants.Utf8Bom; + try { if (stream.CanSeek) { // Ask for 1 more than the length to avoid resizing later, // which is unnecessary in the common case where the stream length doesn't change. - long expectedLength = Math.Max(0, stream.Length - stream.Position) + 1; + long expectedLength = Math.Max(utf8Bom.Length, stream.Length - stream.Position) + 1; rented = ArrayPool.Shared.Rent(checked((int)expectedLength)); } else @@ -308,6 +310,27 @@ namespace System.Text.Json int lastRead; + // Read up to 3 bytes to see if it's the UTF-8 BOM + do + { + // No need for checking for growth, the minimal rent sizes both guarantee it'll fit. + Debug.Assert(rented.Length >= utf8Bom.Length); + + lastRead = stream.Read( + rented, + written, + utf8Bom.Length - written); + + written += lastRead; + } while (lastRead > 0 && written < utf8Bom.Length); + + // If we have 3 bytes, and they're the BOM, reset the write position to 0. + if (written == utf8Bom.Length && + utf8Bom.SequenceEqual(rented.AsSpan(0, utf8Bom.Length))) + { + written = 0; + } + do { if (rented.Length == written) @@ -353,11 +376,14 @@ namespace System.Text.Json try { + // Save the length to a local to be reused across awaits. + int utf8BomLength = JsonConstants.Utf8Bom.Length; + if (stream.CanSeek) { // Ask for 1 more than the length to avoid resizing later, // which is unnecessary in the common case where the stream length doesn't change. - long expectedLength = Math.Max(0, stream.Length - stream.Position) + 1; + long expectedLength = Math.Max(utf8BomLength, stream.Length - stream.Position) + 1; rented = ArrayPool.Shared.Rent(checked((int)expectedLength)); } else @@ -367,6 +393,28 @@ namespace System.Text.Json int lastRead; + // Read up to 3 bytes to see if it's the UTF-8 BOM + do + { + // No need for checking for growth, the minimal rent sizes both guarantee it'll fit. + Debug.Assert(rented.Length >= JsonConstants.Utf8Bom.Length); + + lastRead = await stream.ReadAsync( + rented, + written, + utf8BomLength - written, + cancellationToken).ConfigureAwait(false); + + written += lastRead; + } while (lastRead > 0 && written < utf8BomLength); + + // If we have 3 bytes, and they're the BOM, reset the write position to 0. + if (written == utf8BomLength && + JsonConstants.Utf8Bom.SequenceEqual(rented.AsSpan(0, utf8BomLength))) + { + written = 0; + } + do { if (rented.Length == written) diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs b/src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs index 2fc3b30..748c9d9 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/JsonConstants.cs @@ -23,6 +23,7 @@ namespace System.Text.Json public const byte FormFeed = (byte)'\f'; public const byte Asterisk = (byte)'*'; + public static ReadOnlySpan Utf8Bom => new byte[] { 0xEF, 0xBB, 0xBF }; public static ReadOnlySpan TrueValue => new byte[] { (byte)'t', (byte)'r', (byte)'u', (byte)'e' }; public static ReadOnlySpan FalseValue => new byte[] { (byte)'f', (byte)'a', (byte)'l', (byte)'s', (byte)'e' }; public static ReadOnlySpan NullValue => new byte[] { (byte)'n', (byte)'u', (byte)'l', (byte)'l' }; diff --git a/src/libraries/System.Text.Json/tests/JsonDocumentTests.cs b/src/libraries/System.Text.Json/tests/JsonDocumentTests.cs index 9041519..dc2cb8b 100644 --- a/src/libraries/System.Text.Json/tests/JsonDocumentTests.cs +++ b/src/libraries/System.Text.Json/tests/JsonDocumentTests.cs @@ -19,12 +19,30 @@ namespace System.Text.Json.Tests { public static class JsonDocumentTests { + private static readonly byte[] Utf8Bom = { 0xEF, 0xBB, 0xBF }; + private static readonly Dictionary s_expectedConcat = new Dictionary(); private static readonly Dictionary s_compactJson = new Dictionary(); + public static IEnumerable BadBOMCases { get; } = + new object[][] + { + new object[] { "\u00EF" }, + new object[] { "\u00EF1" }, + new object[] { "\u00EF\u00BB" }, + new object[] { "\u00EF\u00BB1" }, + new object[] { "\u00EF\u00BB\u00BE" }, + new object[] { "\u00EF\u00BB\u00BE1" }, + new object[] { "\u00EF\u00BB\u00FB" }, + new object[] { "\u00EF\u00BB\u00FB1" }, + + // Legal BOM, but no payload. + new object[] { "\u00EF\u00BB\u00BF" }, + }; + public static IEnumerable ReducedTestCases { get; } = new List { @@ -263,6 +281,150 @@ namespace System.Text.Json.Tests GetAwaiter().GetResult()); } + + [Fact] + public static void ParseJson_SeekableStream_Small() + { + byte[] data = { (byte)'1', (byte)'1' }; + + using (JsonDocument doc = JsonDocument.Parse(new MemoryStream(data))) + { + JsonElement root = doc.RootElement; + Assert.Equal(JsonValueType.Number, root.Type); + Assert.Equal(11, root.GetInt32()); + } + } + + [Fact] + public static void ParseJson_UnseekableStream_Small() + { + byte[] data = { (byte)'1', (byte)'1' }; + + using (JsonDocument doc = + JsonDocument.Parse(new WrappedMemoryStream(canRead: true, canWrite: false, canSeek: false, data: data))) + { + JsonElement root = doc.RootElement; + Assert.Equal(JsonValueType.Number, root.Type); + Assert.Equal(11, root.GetInt32()); + } + } + + [Fact] + public static async Task ParseJson_SeekableStream_Small_Async() + { + byte[] data = { (byte)'1', (byte)'1' }; + + using (JsonDocument doc = await JsonDocument.ParseAsync(new MemoryStream(data))) + { + JsonElement root = doc.RootElement; + Assert.Equal(JsonValueType.Number, root.Type); + Assert.Equal(11, root.GetInt32()); + } + } + + [Fact] + public static async Task ParseJson_UnseekableStream_Small_Async() + { + byte[] data = { (byte)'1', (byte)'1' }; + + using (JsonDocument doc = await JsonDocument.ParseAsync( + new WrappedMemoryStream(canRead: true, canWrite: false, canSeek: false, data: data))) + { + JsonElement root = doc.RootElement; + Assert.Equal(JsonValueType.Number, root.Type); + Assert.Equal(11, root.GetInt32()); + } + } + + [Theory] + [MemberData(nameof(ReducedTestCases))] + public static void ParseJson_SeekableStream_WithBOM(bool compactData, TestCaseType type, string jsonString) + { + ParseJson( + compactData, + type, + jsonString, + null, + bytes => JsonDocument.Parse(new MemoryStream(Utf8Bom.Concat(bytes).ToArray()))); + } + + [Theory] + [MemberData(nameof(ReducedTestCases))] + public static void ParseJson_SeekableStream_Async_WithBOM(bool compactData, TestCaseType type, string jsonString) + { + ParseJson( + compactData, + type, + jsonString, + null, + bytes => JsonDocument.ParseAsync(new MemoryStream(Utf8Bom.Concat(bytes).ToArray())).GetAwaiter().GetResult()); + } + + [Theory] + [MemberData(nameof(ReducedTestCases))] + public static void ParseJson_UnseekableStream_WithBOM(bool compactData, TestCaseType type, string jsonString) + { + ParseJson( + compactData, + type, + jsonString, + null, + bytes => JsonDocument.Parse( + new WrappedMemoryStream(canRead: true, canWrite: false, canSeek: false, Utf8Bom.Concat(bytes).ToArray()))); + } + + [Theory] + [MemberData(nameof(ReducedTestCases))] + public static void ParseJson_UnseekableStream_Async_WithBOM(bool compactData, TestCaseType type, string jsonString) + { + ParseJson( + compactData, + type, + jsonString, + null, + bytes => JsonDocument.ParseAsync( + new WrappedMemoryStream(canRead: true, canWrite: false, canSeek: false, Utf8Bom.Concat(bytes).ToArray())). + GetAwaiter().GetResult()); + } + + [Theory] + [MemberData(nameof(BadBOMCases))] + public static void ParseJson_SeekableStream_BadBOM(string json) + { + byte[] data = Encoding.UTF8.GetBytes(json); + Assert.Throws(() => JsonDocument.Parse(new MemoryStream(data))); + } + + [Theory] + [MemberData(nameof(BadBOMCases))] + public static Task ParseJson_SeekableStream_Async_BadBOM(string json) + { + byte[] data = Encoding.UTF8.GetBytes(json); + return Assert.ThrowsAsync(() => JsonDocument.ParseAsync(new MemoryStream(data))); + } + + [Theory] + [MemberData(nameof(BadBOMCases))] + public static void ParseJson_UnseekableStream_BadBOM(string json) + { + byte[] data = Encoding.UTF8.GetBytes(json); + + Assert.Throws( + () => JsonDocument.Parse( + new WrappedMemoryStream(canRead: true, canWrite: false, canSeek: false, data))); + } + + [Theory] + [MemberData(nameof(BadBOMCases))] + public static Task ParseJson_UnseekableStream_Async_BadBOM(string json) + { + byte[] data = Encoding.UTF8.GetBytes(json); + + return Assert.ThrowsAsync( + () => JsonDocument.ParseAsync( + new WrappedMemoryStream(canRead: true, canWrite: false, canSeek: false, data))); + } + [Theory] [MemberData(nameof(ReducedTestCases))] public static void ParseJson_SequenceBytes_Single(bool compactData, TestCaseType type, string jsonString)