From 25b777b2405db5b4143179bf852120c59bf1a13f Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sat, 5 Oct 2019 22:33:18 -0400 Subject: [PATCH] Fix HtmlEncode handling of surrogate pairs (dotnet/corefx#41576) Due to a regression that came as part of changing the code from using pointers to using spans, when the surrogate pair isn't at the beginning of the the input, it's incorrectly encoded. Commit migrated from https://github.com/dotnet/corefx/commit/6de42378223c6cb138d28f812c440092718c7a19 --- .../System.Runtime.Extensions/src/System/Net/WebUtility.cs | 10 +++++----- .../System.Runtime.Extensions/tests/System/Net/WebUtility.cs | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Runtime.Extensions/src/System/Net/WebUtility.cs b/src/libraries/System.Runtime.Extensions/src/System/Net/WebUtility.cs index 998f142..b072ed8 100644 --- a/src/libraries/System.Runtime.Extensions/src/System/Net/WebUtility.cs +++ b/src/libraries/System.Runtime.Extensions/src/System/Net/WebUtility.cs @@ -639,17 +639,17 @@ namespace System.Net private static int GetNextUnicodeScalarValueFromUtf16Surrogate(ReadOnlySpan input, ref int index) { // invariants - Debug.Assert(input.Length >= 1); - Debug.Assert(char.IsSurrogate(input[0])); + Debug.Assert(input.Length - index >= 1); + Debug.Assert(char.IsSurrogate(input[index])); - if (input.Length <= 1) + if (input.Length - index <= 1) { // not enough characters remaining to resurrect the original scalar value return UnicodeReplacementChar; } - char leadingSurrogate = input[0]; - char trailingSurrogate = input[1]; + char leadingSurrogate = input[index]; + char trailingSurrogate = input[index + 1]; if (!char.IsSurrogatePair(leadingSurrogate, trailingSurrogate)) { diff --git a/src/libraries/System.Runtime.Extensions/tests/System/Net/WebUtility.cs b/src/libraries/System.Runtime.Extensions/tests/System/Net/WebUtility.cs index ffba432..93c082a 100644 --- a/src/libraries/System.Runtime.Extensions/tests/System/Net/WebUtility.cs +++ b/src/libraries/System.Runtime.Extensions/tests/System/Net/WebUtility.cs @@ -83,6 +83,8 @@ namespace System.Net.Tests yield return new object[] { char.ConvertFromUtf32(144308), "𣎴" }; yield return new object[] { "\uD800\uDC00", "𐀀" }; yield return new object[] { "a\uD800\uDC00b", "a𐀀b" }; + yield return new object[] { "\uD83D\uDE01\uD83D\uDE02\uD83D\uDE03", "😁😂😃" }; + yield return new object[] { "a\uD83D\uDE01\uD83D\uDE02\uD83D\uDE03b", "a😁😂😃b" }; // High BMP non-chars yield return new object[] { "\uFFFD", "\uFFFD" }; -- 2.7.4