From: Stephen Toub Date: Mon, 16 Sep 2019 18:56:59 +0000 (-0400) Subject: Remove System.Net.Http dependency on System.Text.RegularExpressions (dotnet/corefx... X-Git-Tag: submit/tizen/20210909.063632~11031^2~458 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=78d665b987499774090f902c89b1f44fe942b99c;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Remove System.Net.Http dependency on System.Text.RegularExpressions (dotnet/corefx#41110) * Remove System.Net.Http dependency on System.Text.RegularExpressions This dependency is the only reason we end up with a 105K System.Text.RegularExpressions.dll as part of a trimmed new MVC app. Regex is used in the case where on Windows a bypass list is provided, in which case each item in the list is changed into a regex, which is then evaluated against each url provided to SocketsHttpHandler. But the patterns usable are simple: the only special character recognized is an asterisk, which can map to zero or more of any character. So, we can instead employ a simple processor for such patterns, which then eliminates the need to reference System.Text.RegularExpressions.dll from System.Net.Http.dll. It also happens to be faster. * Address PR feedback * Update src/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpWindowsProxy.cs Co-Authored-By: Jan Kotas Commit migrated from https://github.com/dotnet/corefx/commit/0e65e923689962f9634b5948d15f995140cc7c93 --- diff --git a/src/libraries/Common/src/System/Text/SimpleRegex.cs b/src/libraries/Common/src/System/Text/SimpleRegex.cs new file mode 100644 index 0000000..7ce45d6 --- /dev/null +++ b/src/libraries/Common/src/System/Text/SimpleRegex.cs @@ -0,0 +1,73 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.Text +{ + internal static class SimpleRegex + { + // Based on wildcmp written by Jack Handy - jakkhandy@hotmail.com + // https://www.codeproject.com/Articles/1088/Wildcard-string-compare-globbing + + /// + /// Perform a match between an input string and a pattern in which the only special character + /// is an asterisk, which can map to zero or more of any character in the input. + /// + /// The input to match. + /// The pattern to match against. + /// true if the input matches the pattern; otherwise, false. + public static bool IsMatchWithStarWildcard(ReadOnlySpan input, ReadOnlySpan pattern) + { + int inputPos = 0, inputPosSaved = -1; + int patternPos = 0, patternPosSaved = -1; + + // Loop through each character in the input. + while (inputPos < input.Length) + { + if (patternPos < pattern.Length && pattern[patternPos] == '*') + { + // If we're currently positioned on a wildcard in the pattern, + // move past it and remember where we are to backtrack to. + inputPosSaved = inputPos; + patternPosSaved = ++patternPos; + } + else if (patternPos < pattern.Length && + (pattern[patternPos] == input[inputPos] || + char.ToUpperInvariant(pattern[patternPos]) == char.ToUpperInvariant(input[inputPos]))) + { + // If the characters in the pattern and the input match, advance both. + inputPos++; + patternPos++; + } + else if (patternPosSaved == -1) + { + // If we're not on a wildcard and the current characters don't match and we don't have + // any wildcard to backtrack to, this is not a match. + return false; + } + else + { + // Otherwise, this is not a wildcard, the characters don't match, but we do have a + // wildcard saved, so backtrack to it and use it to consume the next input character. + inputPos = ++inputPosSaved; + patternPos = patternPosSaved; + } + } + + // We've reached the end of the input. Eat all wildcards immediately after where we are + // in the pattern, as if they're at the end, they'll all just map to nothing (and if it + // turns out there's something after them, eating them won't matter). + while (patternPos < pattern.Length && pattern[patternPos] == '*') + { + patternPos++; + } + + // If we are in fact at the end of the pattern, then we successfully matched. + // If there's anything left, it's not a wildcard, so it doesn't match. + Debug.Assert(patternPos <= pattern.Length); + return patternPos == pattern.Length; + } + } +} diff --git a/src/libraries/Common/tests/Common.Tests.csproj b/src/libraries/Common/tests/Common.Tests.csproj index 7175816..a189d85 100644 --- a/src/libraries/Common/tests/Common.Tests.csproj +++ b/src/libraries/Common/tests/Common.Tests.csproj @@ -57,6 +57,9 @@ Common\System\Text\ReusableTextReader.cs + + Common\System\Text\SimpleRegex.cs + Common\CoreLib\System\Text\ValueStringBuilder.cs @@ -74,6 +77,7 @@ + diff --git a/src/libraries/Common/tests/Tests/System/Text/SimpleRegexTests.cs b/src/libraries/Common/tests/Tests/System/Text/SimpleRegexTests.cs new file mode 100644 index 0000000..522909b --- /dev/null +++ b/src/libraries/Common/tests/Tests/System/Text/SimpleRegexTests.cs @@ -0,0 +1,48 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Xunit; + +namespace System.Text.Tests +{ + public class SimpleRegexTests + { + [Theory] + [InlineData("", "", true)] + [InlineData("", "*", true)] + [InlineData("a", "", false)] + [InlineData("a", "*", true)] + [InlineData("a", "a", true)] + [InlineData("A", "a", true)] + [InlineData("a", "A", true)] + [InlineData("a", "b", false)] + [InlineData(" a", "a", false)] + [InlineData("a ", "a", false)] + [InlineData("aaa", "*", true)] + [InlineData("aaa", "*****", true)] + [InlineData("example.com", "*.com", true)] + [InlineData("example.com", "*.net", false)] + [InlineData("sub.example.com", "*.com", true)] + [InlineData("sub.example.com", "*.example.com", true)] + [InlineData("SuB.eXaMpLe.COm", "*.example.com", true)] + [InlineData("sub2.sub1.example.com", "*.example.com", true)] + [InlineData("sub2.sub1.example.com", "*.*.example.com", true)] + [InlineData("sub.example.com", "*.*.example.com", false)] + [InlineData("sub.example.com", "*.*.*", true)] + [InlineData("sub.example.com", "*", true)] + [InlineData("abcdefg", "*a*b*c*d**e***f****g*****", true)] + [InlineData("abcdefg", "*a*b*c*de**e***f****g*****", false)] + [InlineData(".", "*.*", true)] + [InlineData("ab.cde", "*.*", true)] + [InlineData(".cde", "*.*", true)] + [InlineData("cde", "*.*", false)] + [InlineData("cde", "cd*", true)] + [InlineData("192.168.1.123", "192.168.1.*", true)] + [InlineData("192.168.2.123", "192.168.1.*", false)] + public void InputMatchesStarWildcardPattern(string input, string pattern, bool expected) + { + Assert.Equal(expected, SimpleRegex.IsMatchWithStarWildcard(input, pattern)); + } + } +} diff --git a/src/libraries/System.Net.Http.WinHttpHandler/tests/UnitTests/System.Net.Http.WinHttpHandler.Unit.Tests.csproj b/src/libraries/System.Net.Http.WinHttpHandler/tests/UnitTests/System.Net.Http.WinHttpHandler.Unit.Tests.csproj index e8f7141..065f6fa 100644 --- a/src/libraries/System.Net.Http.WinHttpHandler/tests/UnitTests/System.Net.Http.WinHttpHandler.Unit.Tests.csproj +++ b/src/libraries/System.Net.Http.WinHttpHandler/tests/UnitTests/System.Net.Http.WinHttpHandler.Unit.Tests.csproj @@ -75,6 +75,9 @@ Common\System\Runtime\ExceptionServices\ExceptionStackTrace.cs + + Common\System\Text\SimpleRegex.cs + Common\System\Threading\Tasks\RendezvousAwaitable.cs diff --git a/src/libraries/System.Net.Http/src/System.Net.Http.csproj b/src/libraries/System.Net.Http/src/System.Net.Http.csproj index 3e3f157..a6fcebc 100644 --- a/src/libraries/System.Net.Http/src/System.Net.Http.csproj +++ b/src/libraries/System.Net.Http/src/System.Net.Http.csproj @@ -115,6 +115,9 @@ Common\System\Net\HttpDateParser.cs + + Common\System\Text\SimpleRegex.cs + @@ -701,7 +704,6 @@ - diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpWindowsProxy.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpWindowsProxy.cs index 5e9a2e2..a5158e3 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpWindowsProxy.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpWindowsProxy.cs @@ -9,7 +9,7 @@ using System.Diagnostics; using System.IO.Compression; using System.Net.NetworkInformation; using System.Runtime.InteropServices; -using System.Text.RegularExpressions; +using System.Text; using System.Threading; using SafeWinHttpHandle = Interop.WinHttp.SafeWinHttpHandle; @@ -20,7 +20,7 @@ namespace System.Net.Http private readonly MultiProxy _insecureProxy; // URI of the http system proxy if set private readonly MultiProxy _secureProxy; // URI of the https system proxy if set private readonly FailedProxyCache _failedProxies = new FailedProxyCache(); - private readonly List _bypass; // list of domains not to proxy + private readonly List _bypass; // list of domains not to proxy private readonly bool _bypassLocal = false; // we should bypass domain considered local private readonly List _localIp; private ICredentials _credentials; @@ -83,7 +83,7 @@ namespace System.Net.Http // Process bypass list for manual setting. // Initial list size is best guess based on string length assuming each entry is at least 5 characters on average. - _bypass = new List(proxyHelper.ProxyBypass.Length / 5); + _bypass = new List(proxyHelper.ProxyBypass.Length / 5); while (idx < proxyHelper.ProxyBypass.Length) { @@ -137,20 +137,7 @@ namespace System.Net.Http continue; } - try - { - // Escape any special characters and unescape * to get wildcard pattern match. - Regex re = new Regex(Regex.Escape(tmp).Replace("\\*", ".*?") + "$", - RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant); - _bypass.Add(re); - } - catch (Exception ex) - { - if (NetEventSource.IsEnabled) - { - NetEventSource.Error(this, $"Failed to process {tmp} from bypass list: {ex}"); - } - } + _bypass.Add(tmp); } if (_bypass.Count == 0) { @@ -294,10 +281,10 @@ namespace System.Net.Http // Check if we have other rules for bypass. if (_bypass != null) { - foreach (Regex entry in _bypass) + foreach (string entry in _bypass) { // IdnHost does not have []. - if (entry.IsMatch(uri.IdnHost)) + if (SimpleRegex.IsMatchWithStarWildcard(uri.IdnHost, entry)) { return MultiProxy.Empty; } @@ -343,6 +330,6 @@ namespace System.Net.Http } // Access function for unit tests. - internal List BypassList => _bypass; + internal List BypassList => _bypass; } } diff --git a/src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj b/src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj index 3c252df..68b9608 100644 --- a/src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj +++ b/src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj @@ -50,6 +50,9 @@ ProductionCode\Common\System\Net\UriScheme.cs + + Common\System\Text\SimpleRegex.cs + Common\System\ShouldNotBeInvokedException.cs