From c440954f40ced5ff42490e8a1027dac64505f18e Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Wed, 5 Feb 2020 16:28:52 -0500 Subject: [PATCH] Use better names for Regex DynamicMethods to help when profiling (#31817) Currently, names in the profiler just show up as "FindFirstChar37" or "Go21", and it's difficult to know with which regex it's associated. --- .../src/System/Text/RegularExpressions/Regex.cs | 6 ++-- .../Text/RegularExpressions/RegexCompiler.cs | 4 +-- .../Text/RegularExpressions/RegexLWCGCompiler.cs | 37 ++++++++++++++++++---- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index 81e4a51..60468a5 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -80,7 +80,7 @@ namespace System.Text.RegularExpressions if (UseOptionC()) { // Storing into this Regex's factory will also implicitly update the cache - factory = Compile(_code!, options, matchTimeout != InfiniteMatchTimeout); + factory = Compile(pattern, _code!, options, matchTimeout != InfiniteMatchTimeout); _code = null; } #endif @@ -203,8 +203,8 @@ namespace System.Text.RegularExpressions /// instantiating a non-compiled regex. /// [MethodImpl(MethodImplOptions.NoInlining)] - private static RegexRunnerFactory Compile(RegexCode code, RegexOptions options, bool hasTimeout) => - RegexCompiler.Compile(code, options, hasTimeout); + private static RegexRunnerFactory Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) => + RegexCompiler.Compile(pattern, code, options, hasTimeout); #endif public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, AssemblyName assemblyname) => diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index ad6cc11..c615dff 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -142,8 +142,8 @@ namespace System.Text.RegularExpressions /// Entry point to dynamically compile a regular expression. The expression is compiled to /// an in-memory assembly. /// - internal static RegexRunnerFactory Compile(RegexCode code, RegexOptions options, bool hasTimeout) => - new RegexLWCGCompiler().FactoryInstanceFromCode(code, options, hasTimeout); + internal static RegexRunnerFactory Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) => + new RegexLWCGCompiler().FactoryInstanceFromCode(pattern, code, options, hasTimeout); #if DEBUG // until it can be fully implemented /// diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs index a785a3b..09fdc06 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs @@ -10,7 +10,24 @@ namespace System.Text.RegularExpressions { internal sealed class RegexLWCGCompiler : RegexCompiler { + /// + /// Name of the environment variable used to opt-in to including the regex pattern in the DynamicMethod name. + /// Set the environment variable to "1" to turn this on. + /// + private const string IncludePatternInNamesEnvVar = "DOTNET_SYSTEM_TEXT_REGULAREXPRESSIONS_PATTERNINNAME"; + + /// + /// If true, the pattern (or a portion of it) are included in the generated DynamicMethod names. + /// + /// + /// This is opt-in to avoid exposing the pattern, which may itself be dynamically built in diagnostics by default. + /// + private static readonly bool s_includePatternInName = Environment.GetEnvironmentVariable(IncludePatternInNamesEnvVar) == "1"; + + /// Parameter types for the generated Go and FindFirstChar methods. private static readonly Type[] s_paramTypes = new Type[] { typeof(RegexRunner) }; + + /// Id number to use for the next compiled regex. private static int s_regexCount = 0; public RegexLWCGCompiler() : base(persistsAssembly: false) @@ -18,7 +35,7 @@ namespace System.Text.RegularExpressions } /// The top-level driver. Initializes everything then calls the Generate* methods. - public RegexRunnerFactory FactoryInstanceFromCode(RegexCode code, RegexOptions options, bool hasTimeout) + public RegexRunnerFactory FactoryInstanceFromCode(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) { _code = code; _codes = code.Codes; @@ -30,20 +47,28 @@ namespace System.Text.RegularExpressions _options = options; _hasTimeout = hasTimeout; - // pick a unique number for the methods we generate - string regexnumString = ((uint)Interlocked.Increment(ref s_regexCount)).ToString(); + // Pick a unique number for the methods we generate. + object regexNum = (uint)Interlocked.Increment(ref s_regexCount); // object to box once instead of twice below + + // Get a description of the regex to use in the name. This is helpful when profiling, and is opt-in. + string description = string.Empty; + if (s_includePatternInName) + { + const int DescriptionLimit = 100; // arbitrary limit to avoid very long method names + description = "_" + (pattern.Length > DescriptionLimit ? pattern.Substring(0, DescriptionLimit) : pattern); + } - DynamicMethod goMethod = DefineDynamicMethod("Go" + regexnumString, null, typeof(CompiledRegexRunner)); + DynamicMethod goMethod = DefineDynamicMethod($"Regex{regexNum}_Go{description}", null, typeof(CompiledRegexRunner)); GenerateGo(); - DynamicMethod findFirstCharMethod = DefineDynamicMethod("FindFirstChar" + regexnumString, typeof(bool), typeof(CompiledRegexRunner)); + DynamicMethod findFirstCharMethod = DefineDynamicMethod($"Regex{regexNum}_FindFirstChar{description}", typeof(bool), typeof(CompiledRegexRunner)); GenerateFindFirstChar(); return new CompiledRegexRunnerFactory(goMethod, findFirstCharMethod, _trackcount); } /// Begins the definition of a new method (no args) with a specified return value. - public DynamicMethod DefineDynamicMethod(string methname, Type? returntype, Type hostType) + private DynamicMethod DefineDynamicMethod(string methname, Type? returntype, Type hostType) { // We're claiming that these are static methods, but really they are instance methods. // By giving them a parameter which represents "this", we're tricking them into -- 2.7.4