Fix stray non-surrogate char handling in TextEncoder (dotnet/corefx#38328)
authorAbhi Agarwal <abhiagarwal192@gmail.com>
Thu, 13 Jun 2019 18:00:18 +0000 (23:30 +0530)
committerLevi Broderick <GrabYourPitchforks@users.noreply.github.com>
Thu, 13 Jun 2019 18:00:18 +0000 (11:00 -0700)
Fixes issue https://github.com/aspnet/AspNetCore/issues/10573

Commit migrated from https://github.com/dotnet/corefx/commit/a741bb2d6b98773123b0d4578ec2a5f654313576

src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/TextEncoder.cs
src/libraries/System.Text.Encodings.Web/tests/HtmlEncoderTests.cs

index 701ee3a..b98268f 100644 (file)
@@ -337,7 +337,8 @@ namespace System.Text.Encodings.Web
 
             // this loop processes character pairs (in case they are surrogates).
             // there is an if block below to process single last character.
-            for (int secondCharIndex = 1; secondCharIndex < valueLength; secondCharIndex++)
+            int secondCharIndex;
+            for (secondCharIndex = 1; secondCharIndex < valueLength; secondCharIndex++)
             {
                 if (!wasSurrogatePair)
                 {
@@ -370,7 +371,7 @@ namespace System.Text.Encodings.Web
                 }
             }
 
-            if (!wasSurrogatePair)
+            if (!wasSurrogatePair || (secondCharIndex == valueLength))
             {
                 firstChar = value[valueLength - 1];
                 int nextScalar = UnicodeHelpers.GetScalarValueFromUtf16(firstChar, null, out wasSurrogatePair);
index 82cf356..349c425 100644 (file)
@@ -1,4 +1,4 @@
-// Licensed to the .NET Foundation under one or more agreements.
+// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
@@ -13,18 +13,23 @@ namespace Microsoft.Framework.WebEncoders
 {
     public class HtmlEncoderTests
     {
-        [Fact]
-        public void TestSurrogate()
+        [Theory]
+        [InlineData("&#x1F4A9;", "\U0001f4a9")]
+        [InlineData("&#x1F602;2", "\U0001F6022")]
+        [InlineData("&#x1F602; 21", "\U0001F602 21")]
+        [InlineData("x&#x1F602;y", "x\U0001F602y")]
+        [InlineData("&#x1F602;x&#x1F602;y", "\U0001F602x\U0001F602y")]
+        public void TestSurrogate(string expected, string actual)
         {
-            Assert.Equal("&#x1F4A9;", System.Text.Encodings.Web.HtmlEncoder.Default.Encode("\U0001f4a9"));
+            Assert.Equal(expected, System.Text.Encodings.Web.HtmlEncoder.Default.Encode(actual));
             
             using (var writer = new StringWriter())
             {
-                System.Text.Encodings.Web.HtmlEncoder.Default.Encode(writer, "\U0001f4a9");
-                Assert.Equal("&#x1F4A9;", writer.GetStringBuilder().ToString());
+                System.Text.Encodings.Web.HtmlEncoder.Default.Encode(writer, actual);
+                Assert.Equal(expected, writer.GetStringBuilder().ToString());
             }
         }
-        
+
         [Fact]
         public void Ctor_WithTextEncoderSettings()
         {