From ea93c3e78a315a1a948504db277bd799c896d4f8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Tom=C3=A1=C5=A1=20Rylek?= Date: Tue, 9 Feb 2021 01:27:58 +0100 Subject: [PATCH] Introducing support for callchain profile-driven optimizations (#47664) After adding support for calculating callchain profile statistics as an initial attempt at compile-time measure for code layout quality, I have implemented an initial algorithm for method placement based on the statistics. The algorithm is trivial, it just sorts all caller-callee pairs resolved from the profile by descending call counts and then goes through the list and just places methods in the order in which they are found, putting non-profiled methods last. In the System.Private.CoreLib compilation using the profile file simple_new_model_two_apps_1_7.json (not yet the latest one from Siva with signatures), I'm seeing the following difference in the statistics: --callchain-method:none CHARACTERISTIC | PAIR COUNT | CALL COUNT | PERCENTAGE ---------------------------------------------------------------- ENTRIES TOTAL | 291 | 266229 | 100.00 RESOLVED ENTRIES | 267 | 260172 | 97.72 UNRESOLVED ENTRIES | 24 | 6057 | 2.28 NEAR (INTRA-PAGE) CALLS | 145 | 109055 | 40.96 FAR (CROSS-PAGE) CALLS | 122 | 151117 | 56.76 --callchain-method:sort CHARACTERISTIC | PAIR COUNT | CALL COUNT | PERCENTAGE ---------------------------------------------------------------- ENTRIES TOTAL | 291 | 266229 | 100.00 RESOLVED ENTRIES | 267 | 260172 | 97.72 UNRESOLVED ENTRIES | 24 | 6057 | 2.28 NEAR (INTRA-PAGE) CALLS | 237 | 260110 | 97.70 FAR (CROSS-PAGE) CALLS | 30 | 62 | 0.02 While the initial results seem encouraging, I guess that's mostly because the profile used is very small, I guess that with more complex profiles and / or with composite compilation of the entire framework things may become substantially more complex. Thanks Tomas --- .../Compiler/ReadyToRunFileLayoutOptimizer.cs | 193 +++++++++++++++------ src/coreclr/tools/aot/crossgen2/Program.cs | 1 + .../tools/aot/crossgen2/Properties/Resources.resx | 4 +- 3 files changed, 143 insertions(+), 55 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunFileLayoutOptimizer.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunFileLayoutOptimizer.cs index 28349ce..2e4b601 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunFileLayoutOptimizer.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunFileLayoutOptimizer.cs @@ -22,13 +22,14 @@ namespace ILCompiler DefaultSort, ExclusiveWeight, HotCold, - HotWarmCold + HotWarmCold, + CallFrequency, } public enum ReadyToRunFileLayoutAlgorithm { DefaultSort, - MethodOrder + MethodOrder, } class ReadyToRunFileLayoutOptimizer @@ -63,45 +64,7 @@ namespace ILCompiler } } - if (_methodLayoutAlgorithm == ReadyToRunMethodLayoutAlgorithm.ExclusiveWeight) - { - methods.MergeSortAllowDuplicates(sortMethodWithGCInfoByWeight); - - int sortMethodWithGCInfoByWeight(MethodWithGCInfo left, MethodWithGCInfo right) - { - return -MethodWithGCInfoToWeight(left).CompareTo(MethodWithGCInfoToWeight(right)); - } - } - else if (_methodLayoutAlgorithm == ReadyToRunMethodLayoutAlgorithm.HotCold) - { - methods.MergeSortAllowDuplicates((MethodWithGCInfo left, MethodWithGCInfo right) => ComputeHotColdRegion(left).CompareTo(ComputeHotColdRegion(right))); - - int ComputeHotColdRegion(MethodWithGCInfo method) - { - return MethodWithGCInfoToWeight(method) > 0 ? 0 : 1; - } - } - else if (_methodLayoutAlgorithm == ReadyToRunMethodLayoutAlgorithm.HotWarmCold) - { - methods.MergeSortAllowDuplicates((MethodWithGCInfo left, MethodWithGCInfo right) => ComputeHotWarmColdRegion(left).CompareTo(ComputeHotWarmColdRegion(right))); - - int ComputeHotWarmColdRegion(MethodWithGCInfo method) - { - double weight = MethodWithGCInfoToWeight(method); - - // If weight is greater than 128 its probably signicantly used at runtime - if (weight > 128) - return 0; - - // If weight is less than 128 but greater than 0, then its probably used at startup - // or some at runtime, but is less critical than the hot code - if (weight > 0) - return 1; - - // Methods without weight are probably relatively rarely used - return 2; - }; - } + methods = ApplyMethodSort(methods); int sortOrder = 0; @@ -126,18 +89,6 @@ namespace ILCompiler newNodesArray.MergeSortAllowDuplicates(new SortableDependencyNode.ObjectNodeComparer(new CompilerComparer())); return newNodesArray.ToImmutableArray(); - double MethodWithGCInfoToWeight(MethodWithGCInfo method) - { - var profileData = _profileData[method.Method]; - double weight = 0; - - if (profileData != null) - { - weight = profileData.ExclusiveWeight; - } - return weight; - } - void ApplySortToDependencies(DependencyNodeCore node, int depth) { if (depth > 5) @@ -155,5 +106,141 @@ namespace ILCompiler } } } + + private List ApplyMethodSort(List methods) + { + switch (_methodLayoutAlgorithm) + { + case ReadyToRunMethodLayoutAlgorithm.DefaultSort: + break; + + case ReadyToRunMethodLayoutAlgorithm.ExclusiveWeight: + methods.MergeSortAllowDuplicates(sortMethodWithGCInfoByWeight); + + int sortMethodWithGCInfoByWeight(MethodWithGCInfo left, MethodWithGCInfo right) + { + return -MethodWithGCInfoToWeight(left).CompareTo(MethodWithGCInfoToWeight(right)); + } + break; + + case ReadyToRunMethodLayoutAlgorithm.HotCold: + methods.MergeSortAllowDuplicates((MethodWithGCInfo left, MethodWithGCInfo right) => ComputeHotColdRegion(left).CompareTo(ComputeHotColdRegion(right))); + + int ComputeHotColdRegion(MethodWithGCInfo method) + { + return MethodWithGCInfoToWeight(method) > 0 ? 0 : 1; + } + break; + + case ReadyToRunMethodLayoutAlgorithm.HotWarmCold: + methods.MergeSortAllowDuplicates((MethodWithGCInfo left, MethodWithGCInfo right) => ComputeHotWarmColdRegion(left).CompareTo(ComputeHotWarmColdRegion(right))); + + int ComputeHotWarmColdRegion(MethodWithGCInfo method) + { + double weight = MethodWithGCInfoToWeight(method); + + // If weight is greater than 128 its probably signicantly used at runtime + if (weight > 128) + return 0; + + // If weight is less than 128 but greater than 0, then its probably used at startup + // or some at runtime, but is less critical than the hot code + if (weight > 0) + return 1; + + // Methods without weight are probably relatively rarely used + return 2; + }; + break; + + case ReadyToRunMethodLayoutAlgorithm.CallFrequency: + methods = MethodCallFrequencySort(methods); + break; + + default: + throw new NotImplementedException(_methodLayoutAlgorithm.ToString()); + } + + return methods; + } + + private double MethodWithGCInfoToWeight(MethodWithGCInfo method) + { + var profileData = _profileData[method.Method]; + double weight = 0; + + if (profileData != null) + { + weight = profileData.ExclusiveWeight; + } + return weight; + } + + private class CallerCalleeCount + { + public readonly MethodDesc Caller; + public readonly MethodDesc Callee; + public readonly int Count; + + public CallerCalleeCount(MethodDesc caller, MethodDesc callee, int count) + { + Caller = caller; + Callee = callee; + Count = count; + } + } + + /// + /// Use callchain profile information to generate method ordering. We place + /// callers and callees by traversing the caller-callee pairs in the callchain + /// profile in the order of descending hit count. All methods not present + /// (or not matched) in the callchain profile go last. + /// + /// List of methods to place + private List MethodCallFrequencySort(List methodsToPlace) + { + if (_profileData.CallChainProfile == null) + { + return methodsToPlace; + } + + Dictionary methodMap = new Dictionary(); + foreach (MethodWithGCInfo methodWithGCInfo in methodsToPlace) + { + methodMap.Add(methodWithGCInfo.Method, methodWithGCInfo); + } + + List callList = new List(); + foreach (KeyValuePair> methodProfile in _profileData.CallChainProfile.ResolvedProfileData.Where(kvp => methodMap.ContainsKey(kvp.Key))) + { + foreach (KeyValuePair callee in methodProfile.Value.Where(kvp => methodMap.ContainsKey(kvp.Key))) + { + callList.Add(new CallerCalleeCount(methodProfile.Key, callee.Key, callee.Value)); + } + } + callList.Sort((a, b) => b.Count.CompareTo(a.Count)); + + List outputMethods = new List(); + outputMethods.Capacity = methodsToPlace.Count; + + foreach (CallerCalleeCount call in callList) + { + if (methodMap.TryGetValue(call.Caller, out MethodWithGCInfo callerWithGCInfo) && callerWithGCInfo != null) + { + outputMethods.Add(callerWithGCInfo); + methodMap[call.Caller] = null; + } + if (methodMap.TryGetValue(call.Callee, out MethodWithGCInfo calleeWithGCInfo) && calleeWithGCInfo != null) + { + outputMethods.Add(calleeWithGCInfo); + methodMap[call.Callee] = null; + } + } + + // Methods unknown to the callchain profile go last + outputMethods.AddRange(methodMap.Values.Where(m => m != null)); + Debug.Assert(outputMethods.Count == methodsToPlace.Count); + return outputMethods; + } } } diff --git a/src/coreclr/tools/aot/crossgen2/Program.cs b/src/coreclr/tools/aot/crossgen2/Program.cs index 260053c..43de684 100644 --- a/src/coreclr/tools/aot/crossgen2/Program.cs +++ b/src/coreclr/tools/aot/crossgen2/Program.cs @@ -136,6 +136,7 @@ namespace ILCompiler "exclusiveweight" => ReadyToRunMethodLayoutAlgorithm.ExclusiveWeight, "hotcold" => ReadyToRunMethodLayoutAlgorithm.HotCold, "hotwarmcold" => ReadyToRunMethodLayoutAlgorithm.HotWarmCold, + "callfrequency" => ReadyToRunMethodLayoutAlgorithm.CallFrequency, _ => throw new CommandLineException(SR.InvalidMethodLayout) }; } diff --git a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx index d7c4ba1..9c84701 100644 --- a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx +++ b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx @@ -148,7 +148,7 @@ Method layout must be either DefaultSort or MethodOrder. - Method layout must be either DefaultSort, ExclusiveWeight, HotCold, or HotWarmCold. + Method layout must be either DefaultSort, ExclusiveWeight, HotCold, HotWarmCold, or CallFrequency. True to skip compiling methods into the R2R image (default = false) @@ -330,4 +330,4 @@ Explicit specification of the PerfMap file path - + \ No newline at end of file -- 2.7.4