From: Jakob Botsch Nielsen Date: Thu, 29 Jul 2021 19:08:03 +0000 (+0200) Subject: Add Pettis-Hansen sort based on call graph data from .mibc file (#56472) X-Git-Tag: accepted/tizen/unified/20220110.054933~803 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a70bd64505b89f5dd51c7039ddcd9393fa93f930;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Add Pettis-Hansen sort based on call graph data from .mibc file (#56472) Can be used by specifying --methodorder PettisHansen when a .mibc file is passed that has the necessary call graph information. By default, create-mibc adds this information when kernel call stacks are included in the trace. --- diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/MethodWithGCInfo.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/MethodWithGCInfo.cs index a03e6a1..9dc2907 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/MethodWithGCInfo.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/MethodWithGCInfo.cs @@ -325,5 +325,7 @@ namespace ILCompiler.DependencyAnalysis.ReadyToRun public int Offset => 0; public override bool IsShareable => throw new NotImplementedException(); public override bool ShouldSkipEmittingObjectNode(NodeFactory factory) => IsEmpty; + + public override string ToString() => _method.ToString(); } } diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/PettisHansenSort/CallGraphNode.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/PettisHansenSort/CallGraphNode.cs new file mode 100644 index 0000000..d17b8d7 --- /dev/null +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/PettisHansenSort/CallGraphNode.cs @@ -0,0 +1,28 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; + +namespace ILCompiler.PettisHansenSort +{ + public class CallGraphNode + { + public CallGraphNode(int index) + { + Index = index; + } + + public int Index { get; } + public Dictionary OutgoingEdges { get; } = new Dictionary(); + + public void IncreaseEdge(CallGraphNode callee, long count) + { + if (OutgoingEdges.TryGetValue(callee, out long curCount)) + OutgoingEdges[callee] = curCount + count; + else + OutgoingEdges.Add(callee, count); + } + + public override string ToString() => Index.ToString(); + } +} diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/PettisHansenSort/DisjointSetForest.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/PettisHansenSort/DisjointSetForest.cs new file mode 100644 index 0000000..44b7a48 --- /dev/null +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/PettisHansenSort/DisjointSetForest.cs @@ -0,0 +1,91 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; + +namespace ILCompiler.PettisHansenSort +{ + public class DisjointSetForest + { + private Node[] _nodes; + + /// + /// Construct a new forest with the specified number of disjoint sets. + /// + public DisjointSetForest(int numNodes) + { + _nodes = new Node[numNodes]; + for (int i = 0; i < _nodes.Length; i++) + _nodes[i].Parent = i; + + NumNodes = numNodes; + NumDisjointSets = numNodes; + } + + /// + /// Gets the count of disjoint sets that are currently entered in this forest. + /// + public int NumDisjointSets { get; private set; } + public int NumNodes { get; private set; } + + // Add a new disjoint set. + public int Add() + { + if (NumNodes >= _nodes.Length) + Array.Resize(ref _nodes, NumNodes * 2); + + int index = NumNodes; + _nodes[index].Parent = index; + NumDisjointSets++; + NumNodes++; + + return index; + } + + public int FindSet(int node) + { + if (node < 0 || node >= _nodes.Length) + throw new ArgumentOutOfRangeException(nameof(node), node, + "Node must be positive and less than number of nodes"); + + return FindSetInternal(node); + } + + private int FindSetInternal(int node) + { + int parent = _nodes[node].Parent; + if (parent != node) + _nodes[node].Parent = parent = FindSetInternal(parent); + + return parent; + } + + public bool Union(int x, int y) + { + x = FindSet(x); + y = FindSet(y); + + if (x == y) + return false; + + // Make smallest a child of the largest + if (_nodes[y].Rank > _nodes[x].Rank) + _nodes[x].Parent = y; + else + { + _nodes[y].Parent = x; + if (_nodes[x].Rank == _nodes[y].Rank) + _nodes[x].Rank++; + } + + NumDisjointSets--; + return true; + } + + private struct Node + { + public int Parent; + public int Rank; + } + } +} diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/PettisHansenSort/PettisHansen.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/PettisHansenSort/PettisHansen.cs new file mode 100644 index 0000000..2880c82 --- /dev/null +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/PettisHansenSort/PettisHansen.cs @@ -0,0 +1,185 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace ILCompiler.PettisHansenSort +{ + public static class PettisHansen + { + public static List> Sort(List graph) + { + // Create initial graph with a node for each method. + DisjointSetForest unionFind = new DisjointSetForest(graph.Count); + var phNodes = new List[graph.Count]; + // Undirected edges, stored in both directions. + var phEdges = new Dictionary[graph.Count]; + // Construct initial graph with nodes for each method. + for (int i = 0; i < phNodes.Length; i++) + { + CallGraphNode node = graph[i]; + phNodes[i] = new List(1) { i }; + var dict = new Dictionary(node.OutgoingEdges.Count); + phEdges[i] = dict; + } + + void AddEdge(int a, int b, long weight) + { + if (a == b) + return; + + if (phEdges[a].TryGetValue(b, out long curWeight)) + phEdges[a][b] = curWeight + weight; + else + phEdges[a].Add(b, weight); + } + // Now add edges. + for (int i = 0; i < phNodes.Length; i++) + { + foreach (var kvp in graph[i].OutgoingEdges) + { + AddEdge(i, kvp.Key.Index, kvp.Value); + AddEdge(kvp.Key.Index, i, kvp.Value); + } + } + +#if DEBUG + for (int i = 0; i < phNodes.Length; i++) + { + foreach (var kvp in phEdges[i]) + Debug.Assert(phEdges[kvp.Key][i] == phEdges[i][kvp.Key]); + } +#endif + + var queue = new PriorityQueue<(int from, int to), long>(); + for (int i = 0; i < phEdges.Length; i++) + { + foreach (var kvp in phEdges[i]) + { + if (kvp.Key > i) + { + queue.Enqueue((i, kvp.Key), -kvp.Value); // PriorityQueue gives lowest prio first + } + } + } + + while (queue.Count > 0) + { + (int from, int to) = queue.Dequeue(); + from = unionFind.FindSet(from); + to = unionFind.FindSet(to); + + if (from == to) + continue; // Already unioned through a different path + + Debug.Assert(phEdges[from][to] == phEdges[to][from]); + + bool unioned = unionFind.Union(from, to); + Trace.Assert(unioned); + + int winner = unionFind.FindSet(from); + int loser = winner == from ? to : from; + + long OrigWeight(int a, int b) + { + graph[a].OutgoingEdges.TryGetValue(graph[b], out long ab); + graph[b].OutgoingEdges.TryGetValue(graph[a], out long ba); + return ab + ba; + } + + // Transfer all method names from loser to winner, preferring highest weight between endpoints + long wff = OrigWeight(phNodes[winner].First(), phNodes[loser].First()); + long wfl = OrigWeight(phNodes[winner].First(), phNodes[loser].Last()); + long wlf = OrigWeight(phNodes[winner].Last(), phNodes[loser].First()); + long wll = OrigWeight(phNodes[winner].Last(), phNodes[loser].Last()); + if (wlf >= wff && wlf >= wfl && wlf >= wll) + { + // Already in right order + } + else if (wll >= wff && wll >= wfl && wll >= wlf) + { + phNodes[loser].Reverse(); + } + else if (wff >= wfl && wff >= wlf && wff >= wll) + { + phNodes[winner].Reverse(); + } + else + { + Debug.Assert(wfl >= wff && wfl >= wlf && wfl >= wll); + phNodes[winner].Reverse(); + phNodes[loser].Reverse(); + } + + phNodes[winner].AddRange(phNodes[loser]); + phNodes[loser].Clear(); + + // Verify that there is exactly one edge between winner's set and loser's set + Debug.Assert(phEdges[loser].Count(e => unionFind.FindSet(e.Key) == winner) == 1); + + // Get rid of unifying edge + phEdges[winner].Remove(loser); + phEdges[loser].Remove(winner); + + // Transfer all edges from loser to winner, coalescing when there are multiple. + foreach (var edge in phEdges[loser]) + { + // Remove counter edge. + bool removed = phEdges[edge.Key].Remove(loser); + Debug.Assert(removed); + + // Add edge and counter edge, coalescing when necessary. + AddEdge(winner, edge.Key, edge.Value); + AddEdge(edge.Key, winner, edge.Value); + // Add a new entry in the queue as the edge could have changed weight from coalescing. + long weight = phEdges[winner][edge.Key]; + queue.Enqueue((winner, edge.Key), -weight); // Priority queue gives lowest priority first + } + + phEdges[loser].Clear(); + +#if DEBUG + // Assert that there are only edges between representatives. + for (int i = 0; i < phEdges.Length; i++) + { + foreach (var edge in phEdges[i]) + { + Debug.Assert(unionFind.FindSet(i) == i && unionFind.FindSet(edge.Key) == edge.Key); + Debug.Assert(phEdges[edge.Key][i] == phEdges[i][edge.Key]); + } + } +#endif + } + + // Order by component size as we return. Note that we rely on the + // stability of the sort here to keep trivial components of only a + // single method (meaning that we did not see any call edges) in + // the same order as it was in the input (i.e. increasing indices, + // asserted below). + List> components = + phNodes + .Where(n => n.Count != 0) + .OrderByDescending(n => n.Count) + .ToList(); + + // We also expect to see a permutation. + Debug.Assert(components.SelectMany(l => l).OrderBy(i => i).SequenceEqual(Enumerable.Range(0, graph.Count))); + +#if DEBUG + int prev = -1; + foreach (List component in components.SkipWhile(l => l.Count != 1)) + { + Debug.Assert(component[0] > prev); + prev = component[0]; + } +#endif + + return components; + } + } +} diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs index ac7058b..de7ada8 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs @@ -329,7 +329,7 @@ namespace ILCompiler _profileData = profileData; - _fileLayoutOptimizer = new ReadyToRunFileLayoutOptimizer(methodLayoutAlgorithm, fileLayoutAlgorithm, profileData, _nodeFactory); + _fileLayoutOptimizer = new ReadyToRunFileLayoutOptimizer(logger, methodLayoutAlgorithm, fileLayoutAlgorithm, profileData, _nodeFactory); } private readonly static string s_folderUpPrefix = ".." + Path.DirectorySeparatorChar; diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunFileLayoutOptimizer.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunFileLayoutOptimizer.cs index 2e4b601..d8d6d36 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunFileLayoutOptimizer.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunFileLayoutOptimizer.cs @@ -14,6 +14,7 @@ using System.Linq; using System.Collections.Immutable; using System.Text; using System.Reflection.Metadata.Ecma335; +using ILCompiler.PettisHansenSort; namespace ILCompiler { @@ -24,6 +25,7 @@ namespace ILCompiler HotCold, HotWarmCold, CallFrequency, + PettisHansen, } public enum ReadyToRunFileLayoutAlgorithm @@ -34,17 +36,20 @@ namespace ILCompiler class ReadyToRunFileLayoutOptimizer { - public ReadyToRunFileLayoutOptimizer (ReadyToRunMethodLayoutAlgorithm methodAlgorithm, + public ReadyToRunFileLayoutOptimizer (Logger logger, + ReadyToRunMethodLayoutAlgorithm methodAlgorithm, ReadyToRunFileLayoutAlgorithm fileAlgorithm, ProfileDataManager profileData, NodeFactory nodeFactory) { + _logger = logger; _methodLayoutAlgorithm = methodAlgorithm; _fileLayoutAlgorithm = fileAlgorithm; _profileData = profileData; _nodeFactory = nodeFactory; } + private Logger _logger; private ReadyToRunMethodLayoutAlgorithm _methodLayoutAlgorithm = ReadyToRunMethodLayoutAlgorithm.DefaultSort; private ReadyToRunFileLayoutAlgorithm _fileLayoutAlgorithm = ReadyToRunFileLayoutAlgorithm.DefaultSort; private ProfileDataManager _profileData; @@ -157,6 +162,10 @@ namespace ILCompiler methods = MethodCallFrequencySort(methods); break; + case ReadyToRunMethodLayoutAlgorithm.PettisHansen: + methods = PettisHansenSort(methods); + break; + default: throw new NotImplementedException(_methodLayoutAlgorithm.ToString()); } @@ -242,5 +251,57 @@ namespace ILCompiler Debug.Assert(outputMethods.Count == methodsToPlace.Count); return outputMethods; } + + /// + /// Sort methods with Pettis-Hansen using call graph data from profile. + /// + private List PettisHansenSort(List methodsToPlace) + { + var graphNodes = new List(methodsToPlace.Count); + var mdToIndex = new Dictionary(); + int index = 0; + foreach (MethodWithGCInfo method in methodsToPlace) + { + mdToIndex.Add(method.Method, index); + graphNodes.Add(new CallGraphNode(index)); + index++; + } + + bool any = false; + foreach (MethodWithGCInfo method in methodsToPlace) + { + MethodProfileData data = _profileData[method.Method]; + if (data == null || data.CallWeights == null) + continue; + + foreach ((MethodDesc other, int count) in data.CallWeights) + { + if (!mdToIndex.TryGetValue(other, out int otherIndex)) + continue; + + graphNodes[mdToIndex[method.Method]].IncreaseEdge(graphNodes[otherIndex], count); + any = true; + } + } + + if (!any) + { + _logger.Writer.WriteLine("Warning: no call graph data was found or a .mibc file was not specified. Skipping Pettis Hansen method ordering."); + return methodsToPlace; + } + + List> components = PettisHansen.Sort(graphNodes); + // We expect to see a permutation. + Debug.Assert(components.SelectMany(l => l).OrderBy(i => i).SequenceEqual(Enumerable.Range(0, methodsToPlace.Count))); + + List result = new List(methodsToPlace.Count); + foreach (List component in components) + { + foreach (int node in component) + result.Add(methodsToPlace[node]); + } + + return result; + } } } diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj index c1a8ea5..4e3aaa4 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj @@ -188,6 +188,9 @@ + + + diff --git a/src/coreclr/tools/aot/crossgen2/Program.cs b/src/coreclr/tools/aot/crossgen2/Program.cs index cc8e910..36b5bdd 100644 --- a/src/coreclr/tools/aot/crossgen2/Program.cs +++ b/src/coreclr/tools/aot/crossgen2/Program.cs @@ -155,6 +155,7 @@ namespace ILCompiler "hotcold" => ReadyToRunMethodLayoutAlgorithm.HotCold, "hotwarmcold" => ReadyToRunMethodLayoutAlgorithm.HotWarmCold, "callfrequency" => ReadyToRunMethodLayoutAlgorithm.CallFrequency, + "pettishansen" => ReadyToRunMethodLayoutAlgorithm.PettisHansen, _ => throw new CommandLineException(SR.InvalidMethodLayout) }; } diff --git a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx index ab668d1..670f214 100644 --- a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx +++ b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx @@ -148,7 +148,7 @@ Method layout must be either DefaultSort or MethodOrder. - Method layout must be either DefaultSort, ExclusiveWeight, HotCold, HotWarmCold, or CallFrequency. + Method layout must be either DefaultSort, ExclusiveWeight, HotCold, HotWarmCold, CallFrequency or PettisHansen. True to skip compiling methods into the R2R image (default = false)