Refactor ForbidHtmlCharacters to allow for more trimming. (#48172)
authorEric Erhardt <eric.erhardt@microsoft.com>
Fri, 12 Feb 2021 15:41:36 +0000 (09:41 -0600)
committerGitHub <noreply@github.com>
Fri, 12 Feb 2021 15:41:36 +0000 (09:41 -0600)
Having a static method on an uninstanstiated class is causing the class to be preserved along with its base hierarchy. And any overriden methods to satisfy abstract base methods.

src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/DefaultJavaScriptEncoder.cs
src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/DefaultJavaScriptEncoderBasicLatin.cs
src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/HtmlEncoder.cs
src/libraries/System.Text.Encodings.Web/src/System/Text/Encodings/Web/UrlEncoder.cs

index 4706a8e..6aa68a5 100644 (file)
@@ -39,7 +39,7 @@ namespace System.Text.Encodings.Web
             // it's unfortunately common for developers to
             // forget to HTML-encode a string once it has been JS-encoded,
             // so this offers extra protection.
-            DefaultHtmlEncoder.ForbidHtmlCharacters(_allowedCharacters);
+            HtmlEncoderHelper.ForbidHtmlCharacters(_allowedCharacters);
 
             // '\' (U+005C REVERSE SOLIDUS) must always be escaped in Javascript / ECMAScript / JSON.
             // '/' (U+002F SOLIDUS) is not Javascript / ECMAScript / JSON-sensitive so doesn't need to be escaped.
index 82e38db..4e59276 100644 (file)
@@ -33,7 +33,7 @@ namespace System.Text.Encodings.Web
             // it's unfortunately common for developers to
             // forget to HTML-encode a string once it has been JS-encoded,
             // so this offers extra protection.
-            DefaultHtmlEncoder.ForbidHtmlCharacters(allowedCharacters);
+            HtmlEncoderHelper.ForbidHtmlCharacters(allowedCharacters);
 
             // '\' (U+005C REVERSE SOLIDUS) must always be escaped in Javascript / ECMAScript / JSON.
             // '/' (U+002F SOLIDUS) is not Javascript / ECMAScript / JSON-sensitive so doesn't need to be escaped.
index 281a8f5..2198dbc 100644 (file)
@@ -1,7 +1,6 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System.ComponentModel;
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
 using System.Text.Internal;
@@ -62,17 +61,7 @@ namespace System.Text.Encodings.Web
             // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
             _allowedCharacters.ForbidUndefinedCharacters();
 
-            ForbidHtmlCharacters(_allowedCharacters);
-        }
-
-        internal static void ForbidHtmlCharacters(AllowedCharactersBitmap allowedCharacters)
-        {
-            allowedCharacters.ForbidCharacter('<');
-            allowedCharacters.ForbidCharacter('>');
-            allowedCharacters.ForbidCharacter('&');
-            allowedCharacters.ForbidCharacter('\''); // can be used to escape attributes
-            allowedCharacters.ForbidCharacter('\"'); // can be used to escape attributes
-            allowedCharacters.ForbidCharacter('+'); // technically not HTML-specific, but can be used to perform UTF7-based attacks
+            HtmlEncoderHelper.ForbidHtmlCharacters(_allowedCharacters);
         }
 
         public DefaultHtmlEncoder(params UnicodeRange[] allowedRanges) : this(new TextEncoderSettings(allowedRanges))
@@ -163,4 +152,21 @@ namespace System.Text.Encodings.Web
             return true;
         }
     }
+
+    /// <summary>
+    /// Separates static methods from HtmlEncoder and DefaultHtmlEncoder so those classes can be trimmed
+    /// when only these static methods are needed.
+    /// </summary>
+    internal static class HtmlEncoderHelper
+    {
+        internal static void ForbidHtmlCharacters(AllowedCharactersBitmap allowedCharacters)
+        {
+            allowedCharacters.ForbidCharacter('<');
+            allowedCharacters.ForbidCharacter('>');
+            allowedCharacters.ForbidCharacter('&');
+            allowedCharacters.ForbidCharacter('\''); // can be used to escape attributes
+            allowedCharacters.ForbidCharacter('\"'); // can be used to escape attributes
+            allowedCharacters.ForbidCharacter('+'); // technically not HTML-specific, but can be used to perform UTF7-based attacks
+        }
+    }
 }
index a60ecad..f8bfeea 100644 (file)
@@ -74,7 +74,7 @@ namespace System.Text.Encodings.Web
             // it's unfortunately common for developers to
             // forget to HTML-encode a string once it has been URL-encoded,
             // so this offers extra protection.
-            DefaultHtmlEncoder.ForbidHtmlCharacters(_allowedCharacters);
+            HtmlEncoderHelper.ForbidHtmlCharacters(_allowedCharacters);
 
             // Per RFC 3987, Sec. 2.2, we want encodings that are safe for
             // four particular components: 'isegment', 'ipath-noscheme',