Remove System.Net.Http dependency on System.Text.RegularExpressions (dotnet/corefx...
authorStephen Toub <stoub@microsoft.com>
Mon, 16 Sep 2019 18:56:59 +0000 (14:56 -0400)
committerGitHub <noreply@github.com>
Mon, 16 Sep 2019 18:56:59 +0000 (14:56 -0400)
* Remove System.Net.Http dependency on System.Text.RegularExpressions

This dependency is the only reason we end up with a 105K System.Text.RegularExpressions.dll as part of a trimmed new MVC app.  Regex is used in the case where on Windows a bypass list is provided, in which case each item in the list is changed into a regex, which is then evaluated against each url provided to SocketsHttpHandler.  But the patterns usable are simple: the only special character recognized is an asterisk, which can map to zero or more of any character.  So, we can instead employ a simple processor for such patterns, which then eliminates the need to reference System.Text.RegularExpressions.dll from System.Net.Http.dll.  It also happens to be faster.

* Address PR feedback

* Update src/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpWindowsProxy.cs

Co-Authored-By: Jan Kotas <jkotas@microsoft.com>
Commit migrated from https://github.com/dotnet/corefx/commit/0e65e923689962f9634b5948d15f995140cc7c93

src/libraries/Common/src/System/Text/SimpleRegex.cs [new file with mode: 0644]
src/libraries/Common/tests/Common.Tests.csproj
src/libraries/Common/tests/Tests/System/Text/SimpleRegexTests.cs [new file with mode: 0644]
src/libraries/System.Net.Http.WinHttpHandler/tests/UnitTests/System.Net.Http.WinHttpHandler.Unit.Tests.csproj
src/libraries/System.Net.Http/src/System.Net.Http.csproj
src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpWindowsProxy.cs
src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj

diff --git a/src/libraries/Common/src/System/Text/SimpleRegex.cs b/src/libraries/Common/src/System/Text/SimpleRegex.cs
new file mode 100644 (file)
index 0000000..7ce45d6
--- /dev/null
@@ -0,0 +1,73 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Diagnostics;
+
+namespace System.Text
+{
+    internal static class SimpleRegex
+    {
+        // Based on wildcmp written by Jack Handy - <A href="mailto:jakkhandy@hotmail.com">jakkhandy@hotmail.com</A>
+        // https://www.codeproject.com/Articles/1088/Wildcard-string-compare-globbing
+
+        /// <summary>
+        /// Perform a match between an input string and a pattern in which the only special character
+        /// is an asterisk, which can map to zero or more of any character in the input.
+        /// </summary>
+        /// <param name="input">The input to match.</param>
+        /// <param name="pattern">The pattern to match against.</param>
+        /// <returns>true if the input matches the pattern; otherwise, false.</returns>
+        public static bool IsMatchWithStarWildcard(ReadOnlySpan<char> input, ReadOnlySpan<char> pattern)
+        {
+            int inputPos = 0, inputPosSaved = -1;
+            int patternPos = 0, patternPosSaved = -1;
+
+            // Loop through each character in the input.
+            while (inputPos < input.Length)
+            {
+                if (patternPos < pattern.Length && pattern[patternPos] == '*')
+                {
+                    // If we're currently positioned on a wildcard in the pattern,
+                    // move past it and remember where we are to backtrack to.
+                    inputPosSaved = inputPos;
+                    patternPosSaved = ++patternPos;
+                }
+                else if (patternPos < pattern.Length &&
+                    (pattern[patternPos] == input[inputPos] ||
+                     char.ToUpperInvariant(pattern[patternPos]) == char.ToUpperInvariant(input[inputPos])))
+                {
+                    // If the characters in the pattern and the input match, advance both.
+                    inputPos++;
+                    patternPos++;
+                }
+                else if (patternPosSaved == -1)
+                {
+                    // If we're not on a wildcard and the current characters don't match and we don't have
+                    // any wildcard to backtrack to, this is not a match.
+                    return false;
+                }
+                else
+                {
+                    // Otherwise, this is not a wildcard, the characters don't match, but we do have a
+                    // wildcard saved, so backtrack to it and use it to consume the next input character.
+                    inputPos = ++inputPosSaved;
+                    patternPos = patternPosSaved;
+                }
+            }
+
+            // We've reached the end of the input.  Eat all wildcards immediately after where we are
+            // in the pattern, as if they're at the end, they'll all just map to nothing (and if it
+            // turns out there's something after them, eating them won't matter).
+            while (patternPos < pattern.Length && pattern[patternPos] == '*')
+            {
+                patternPos++;
+            }
+
+            // If we are in fact at the end of the pattern, then we successfully matched.
+            // If there's anything left, it's not a wildcard, so it doesn't match.
+            Debug.Assert(patternPos <= pattern.Length);
+            return patternPos == pattern.Length;
+        }
+    }
+}
index 7175816..a189d85 100644 (file)
@@ -57,6 +57,9 @@
     <Compile Include="$(CommonPath)\System\Text\ReusableTextReader.cs">
       <Link>Common\System\Text\ReusableTextReader.cs</Link>
     </Compile>
+    <Compile Include="$(CommonPath)\System\Text\SimpleRegex.cs">
+      <Link>Common\System\Text\SimpleRegex.cs</Link>
+    </Compile>
     <Compile Include="$(CommonPath)\CoreLib\System\Text\ValueStringBuilder.cs">
       <Link>Common\CoreLib\System\Text\ValueStringBuilder.cs</Link>
     </Compile>
@@ -74,6 +77,7 @@
     <Compile Include="Tests\System\Net\HttpDateParserTests.cs" />
     <Compile Include="Tests\System\PasteArgumentsTests.cs" />
     <Compile Include="Tests\System\Security\IdentityHelperTests.cs" />
+    <Compile Include="Tests\System\Text\SimpleRegexTests.cs" />
     <Compile Include="Tests\System\Text\ValueStringBuilderTests.cs" />
     <Compile Include="Tests\System\StringExtensions.Tests.cs" />
     <Compile Include="Tests\System\Collections\Generic\ArrayBuilderTests.cs" />
diff --git a/src/libraries/Common/tests/Tests/System/Text/SimpleRegexTests.cs b/src/libraries/Common/tests/Tests/System/Text/SimpleRegexTests.cs
new file mode 100644 (file)
index 0000000..522909b
--- /dev/null
@@ -0,0 +1,48 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Xunit;
+
+namespace System.Text.Tests
+{
+    public class SimpleRegexTests
+    {
+        [Theory]
+        [InlineData("", "", true)]
+        [InlineData("", "*", true)]
+        [InlineData("a", "", false)]
+        [InlineData("a", "*", true)]
+        [InlineData("a", "a", true)]
+        [InlineData("A", "a", true)]
+        [InlineData("a", "A", true)]
+        [InlineData("a", "b", false)]
+        [InlineData(" a", "a", false)]
+        [InlineData("a ", "a", false)]
+        [InlineData("aaa", "*", true)]
+        [InlineData("aaa", "*****", true)]
+        [InlineData("example.com", "*.com", true)]
+        [InlineData("example.com", "*.net", false)]
+        [InlineData("sub.example.com", "*.com", true)]
+        [InlineData("sub.example.com", "*.example.com", true)]
+        [InlineData("SuB.eXaMpLe.COm", "*.example.com", true)]
+        [InlineData("sub2.sub1.example.com", "*.example.com", true)]
+        [InlineData("sub2.sub1.example.com", "*.*.example.com", true)]
+        [InlineData("sub.example.com", "*.*.example.com", false)]
+        [InlineData("sub.example.com", "*.*.*", true)]
+        [InlineData("sub.example.com", "*", true)]
+        [InlineData("abcdefg", "*a*b*c*d**e***f****g*****", true)]
+        [InlineData("abcdefg", "*a*b*c*de**e***f****g*****", false)]
+        [InlineData(".", "*.*", true)]
+        [InlineData("ab.cde", "*.*", true)]
+        [InlineData(".cde", "*.*", true)]
+        [InlineData("cde", "*.*", false)]
+        [InlineData("cde", "cd*", true)]
+        [InlineData("192.168.1.123", "192.168.1.*", true)]
+        [InlineData("192.168.2.123", "192.168.1.*", false)]
+        public void InputMatchesStarWildcardPattern(string input, string pattern, bool expected)
+        {
+            Assert.Equal(expected, SimpleRegex.IsMatchWithStarWildcard(input, pattern));
+        }
+    }
+}
index e8f7141..065f6fa 100644 (file)
@@ -75,6 +75,9 @@
     <Compile Include="$(CommonPath)\System\Runtime\ExceptionServices\ExceptionStackTrace.cs">
       <Link>Common\System\Runtime\ExceptionServices\ExceptionStackTrace.cs</Link>
     </Compile>
+    <Compile Include="$(CommonPath)\System\Text\SimpleRegex.cs">
+      <Link>Common\System\Text\SimpleRegex.cs</Link>
+    </Compile>
     <Compile Include="$(CommonPath)\System\Threading\Tasks\RendezvousAwaitable.cs">
       <Link>Common\System\Threading\Tasks\RendezvousAwaitable.cs</Link>
     </Compile>
index 3e3f157..a6fcebc 100644 (file)
     <Compile Include="$(CommonPath)\System\Net\HttpDateParser.cs">
       <Link>Common\System\Net\HttpDateParser.cs</Link>
     </Compile>
+    <Compile Include="$(CommonPath)\System\Text\SimpleRegex.cs">
+      <Link>Common\System\Text\SimpleRegex.cs</Link>
+    </Compile>
   </ItemGroup>
   <!-- SocketsHttpHandler implementation -->
   <ItemGroup Condition="'$(TargetGroup)' == 'netcoreapp'">
     <Reference Include="System.Security.Principal.Windows" />
     <Reference Include="System.Text.Encoding" />
     <Reference Include="System.Text.Encoding.Extensions" />
-    <Reference Include="System.Text.RegularExpressions" />
     <Reference Include="System.Threading" />
     <Reference Include="System.Threading.Thread" />
     <Reference Include="System.Threading.ThreadPool" />
index 5e9a2e2..a5158e3 100644 (file)
@@ -9,7 +9,7 @@ using System.Diagnostics;
 using System.IO.Compression;
 using System.Net.NetworkInformation;
 using System.Runtime.InteropServices;
-using System.Text.RegularExpressions;
+using System.Text;
 using System.Threading;
 using SafeWinHttpHandle = Interop.WinHttp.SafeWinHttpHandle;
 
@@ -20,7 +20,7 @@ namespace System.Net.Http
         private readonly MultiProxy _insecureProxy;    // URI of the http system proxy if set
         private readonly MultiProxy _secureProxy;      // URI of the https system proxy if set
         private readonly FailedProxyCache _failedProxies = new FailedProxyCache();
-        private readonly List<Regex> _bypass;          // list of domains not to proxy
+        private readonly List<string> _bypass;         // list of domains not to proxy
         private readonly bool _bypassLocal = false;    // we should bypass domain considered local
         private readonly List<IPAddress> _localIp;
         private ICredentials _credentials;
@@ -83,7 +83,7 @@ namespace System.Net.Http
 
                     // Process bypass list for manual setting.
                     // Initial list size is best guess based on string length assuming each entry is at least 5 characters on average.
-                    _bypass = new List<Regex>(proxyHelper.ProxyBypass.Length / 5);
+                    _bypass = new List<string>(proxyHelper.ProxyBypass.Length / 5);
 
                     while (idx < proxyHelper.ProxyBypass.Length)
                     {
@@ -137,20 +137,7 @@ namespace System.Net.Http
                             continue;
                         }
 
-                        try
-                        {
-                            // Escape any special characters and unescape * to get wildcard pattern match.
-                            Regex re = new Regex(Regex.Escape(tmp).Replace("\\*", ".*?") + "$",
-                                            RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
-                            _bypass.Add(re);
-                        }
-                        catch (Exception ex)
-                        {
-                            if (NetEventSource.IsEnabled)
-                            {
-                                NetEventSource.Error(this, $"Failed to process {tmp} from bypass list: {ex}");
-                            }
-                        }
+                        _bypass.Add(tmp);
                     }
                     if (_bypass.Count == 0)
                     {
@@ -294,10 +281,10 @@ namespace System.Net.Http
                 // Check if we have other rules for bypass.
                 if (_bypass != null)
                 {
-                    foreach (Regex entry in _bypass)
+                    foreach (string entry in _bypass)
                     {
                         // IdnHost does not have [].
-                        if (entry.IsMatch(uri.IdnHost))
+                        if (SimpleRegex.IsMatchWithStarWildcard(uri.IdnHost, entry))
                         {
                             return MultiProxy.Empty;
                         }
@@ -343,6 +330,6 @@ namespace System.Net.Http
         }
 
         // Access function for unit tests.
-        internal List<Regex> BypassList => _bypass;
+        internal List<string> BypassList => _bypass;
     }
 }
index 3c252df..68b9608 100644 (file)
@@ -50,6 +50,9 @@
     <Compile Include="$(CommonPath)\System\Net\UriScheme.cs">
       <Link>ProductionCode\Common\System\Net\UriScheme.cs</Link>
     </Compile>
+    <Compile Include="$(CommonPath)\System\Text\SimpleRegex.cs">
+      <Link>Common\System\Text\SimpleRegex.cs</Link>
+    </Compile>
     <Compile Include="$(CommonTestPath)\System\ShouldNotBeInvokedException.cs">
       <Link>Common\System\ShouldNotBeInvokedException.cs</Link>
     </Compile>