From 763671f387fdd00329c5b715d3ec6c62680f3f8e Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Tue, 21 Jul 2020 13:46:11 -0700 Subject: [PATCH] [COFF] Port CallGraphSort to COFF from ELF --- lld/COFF/CMakeLists.txt | 1 + lld/COFF/CallGraphSort.cpp | 245 +++++++++++++++++++++++++++++++++ lld/COFF/CallGraphSort.h | 22 +++ lld/COFF/Config.h | 11 ++ lld/COFF/Driver.cpp | 94 ++++++++++++- lld/COFF/InputFiles.cpp | 5 + lld/COFF/InputFiles.h | 2 + lld/COFF/Options.td | 11 ++ lld/COFF/Writer.cpp | 20 ++- lld/ELF/CallGraphSort.cpp | 6 +- lld/test/COFF/cgprofile-bad-clusters.s | 61 ++++++++ lld/test/COFF/cgprofile-err.s | 11 ++ lld/test/COFF/cgprofile-icf.s | 45 ++++++ lld/test/COFF/cgprofile-obj.s | 45 ++++++ lld/test/COFF/cgprofile-print.s | 34 +++++ lld/test/COFF/cgprofile-txt.s | 43 ++++++ 16 files changed, 646 insertions(+), 10 deletions(-) create mode 100644 lld/COFF/CallGraphSort.cpp create mode 100644 lld/COFF/CallGraphSort.h create mode 100644 lld/test/COFF/cgprofile-bad-clusters.s create mode 100644 lld/test/COFF/cgprofile-err.s create mode 100644 lld/test/COFF/cgprofile-icf.s create mode 100644 lld/test/COFF/cgprofile-obj.s create mode 100644 lld/test/COFF/cgprofile-print.s create mode 100644 lld/test/COFF/cgprofile-txt.s diff --git a/lld/COFF/CMakeLists.txt b/lld/COFF/CMakeLists.txt index 796f7a8..bbcd337 100644 --- a/lld/COFF/CMakeLists.txt +++ b/lld/COFF/CMakeLists.txt @@ -3,6 +3,7 @@ tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(COFFOptionsTableGen) add_lld_library(lldCOFF + CallGraphSort.cpp Chunks.cpp DebugTypes.cpp DLL.cpp diff --git a/lld/COFF/CallGraphSort.cpp b/lld/COFF/CallGraphSort.cpp new file mode 100644 index 0000000..d3e5312 --- /dev/null +++ b/lld/COFF/CallGraphSort.cpp @@ -0,0 +1,245 @@ +//===- CallGraphSort.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details +/// about the algorithm. +/// +//===----------------------------------------------------------------------===// + +#include "CallGraphSort.h" +#include "InputFiles.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "lld/Common/ErrorHandler.h" + +#include + +using namespace llvm; +using namespace lld; +using namespace lld::coff; + +namespace { +struct Edge { + int from; + uint64_t weight; +}; + +struct Cluster { + Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {} + + double getDensity() const { + if (size == 0) + return 0; + return double(weight) / double(size); + } + + int next; + int prev; + uint64_t size; + uint64_t weight = 0; + uint64_t initialWeight = 0; + Edge bestPred = {-1, 0}; +}; + +class CallGraphSort { +public: + CallGraphSort(); + + DenseMap run(); + +private: + std::vector clusters; + std::vector sections; +}; + +// Maximum amount the combined cluster density can be worse than the original +// cluster to consider merging. +constexpr int MAX_DENSITY_DEGRADATION = 8; + +// Maximum cluster size in bytes. +constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024; +} // end anonymous namespace + +using SectionPair = std::pair; + +// Take the edge list in Config->CallGraphProfile, resolve symbol names to +// Symbols, and generate a graph between InputSections with the provided +// weights. +CallGraphSort::CallGraphSort() { + MapVector &profile = config->callGraphProfile; + DenseMap secToCluster; + + auto getOrCreateNode = [&](const SectionChunk *isec) -> int { + auto res = secToCluster.try_emplace(isec, clusters.size()); + if (res.second) { + sections.push_back(isec); + clusters.emplace_back(clusters.size(), isec->getSize()); + } + return res.first->second; + }; + + // Create the graph. + for (std::pair &c : profile) { + const auto *fromSec = cast(c.first.first->repl); + const auto *toSec = cast(c.first.second->repl); + uint64_t weight = c.second; + + // Ignore edges between input sections belonging to different output + // sections. This is done because otherwise we would end up with clusters + // containing input sections that can't actually be placed adjacently in the + // output. This messes with the cluster size and density calculations. We + // would also end up moving input sections in other output sections without + // moving them closer to what calls them. + if (fromSec->getOutputSection() != toSec->getOutputSection()) + continue; + + int from = getOrCreateNode(fromSec); + int to = getOrCreateNode(toSec); + + clusters[to].weight += weight; + + if (from == to) + continue; + + // Remember the best edge. + Cluster &toC = clusters[to]; + if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) { + toC.bestPred.from = from; + toC.bestPred.weight = weight; + } + } + for (Cluster &c : clusters) + c.initialWeight = c.weight; +} + +// It's bad to merge clusters which would degrade the density too much. +static bool isNewDensityBad(Cluster &a, Cluster &b) { + double newDensity = double(a.weight + b.weight) / double(a.size + b.size); + return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION; +} + +// Find the leader of V's belonged cluster (represented as an equivalence +// class). We apply union-find path-halving technique (simple to implement) in +// the meantime as it decreases depths and the time complexity. +static int getLeader(std::vector &leaders, int v) { + while (leaders[v] != v) { + leaders[v] = leaders[leaders[v]]; + v = leaders[v]; + } + return v; +} + +static void mergeClusters(std::vector &cs, Cluster &into, int intoIdx, + Cluster &from, int fromIdx) { + int tail1 = into.prev, tail2 = from.prev; + into.prev = tail2; + cs[tail2].next = intoIdx; + from.prev = tail1; + cs[tail1].next = fromIdx; + into.size += from.size; + into.weight += from.weight; + from.size = 0; + from.weight = 0; +} + +// Group InputSections into clusters using the Call-Chain Clustering heuristic +// then sort the clusters by density. +DenseMap CallGraphSort::run() { + std::vector sorted(clusters.size()); + std::vector leaders(clusters.size()); + + std::iota(leaders.begin(), leaders.end(), 0); + std::iota(sorted.begin(), sorted.end(), 0); + llvm::stable_sort(sorted, [&](int a, int b) { + return clusters[a].getDensity() > clusters[b].getDensity(); + }); + + for (int l : sorted) { + // The cluster index is the same as the index of its leader here because + // clusters[L] has not been merged into another cluster yet. + Cluster &c = clusters[l]; + + // Don't consider merging if the edge is unlikely. + if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight) + continue; + + int predL = getLeader(leaders, c.bestPred.from); + if (l == predL) + continue; + + Cluster *predC = &clusters[predL]; + if (c.size + predC->size > MAX_CLUSTER_SIZE) + continue; + + if (isNewDensityBad(*predC, c)) + continue; + + leaders[l] = predL; + mergeClusters(clusters, *predC, predL, c, l); + } + + // Sort remaining non-empty clusters by density. + sorted.clear(); + for (int i = 0, e = (int)clusters.size(); i != e; ++i) + if (clusters[i].size > 0) + sorted.push_back(i); + llvm::stable_sort(sorted, [&](int a, int b) { + return clusters[a].getDensity() > clusters[b].getDensity(); + }); + + DenseMap orderMap; + // Sections will be sorted by increasing order. Absent sections will have + // priority 0 and be placed at the end of sections. + int curOrder = INT_MIN; + for (int leader : sorted) { + for (int i = leader;;) { + orderMap[sections[i]] = curOrder++; + i = clusters[i].next; + if (i == leader) + break; + } + } + if (!config->printSymbolOrder.empty()) { + std::error_code ec; + raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None); + if (ec) { + error("cannot open " + config->printSymbolOrder + ": " + ec.message()); + return orderMap; + } + // Print the symbols ordered by C3, in the order of increasing curOrder + // Instead of sorting all the orderMap, just repeat the loops above. + for (int leader : sorted) + for (int i = leader;;) { + const SectionChunk *sc = sections[i]; + + // Search all the symbols in the file of the section + // and find out a DefinedCOFF symbol with name that is within the + // section. + for (Symbol *sym : sc->file->getSymbols()) + if (auto *d = dyn_cast_or_null(sym)) + // Filter out non-COMDAT symbols and section symbols. + if (d->isCOMDAT && !d->getCOFFSymbol().isSection() && + sc == d->getChunk()) + os << sym->getName() << "\n"; + i = clusters[i].next; + if (i == leader) + break; + } + } + + return orderMap; +} + +// Sort sections by the profile data provided by /call-graph-ordering-file +// +// This first builds a call graph based on the profile data then merges sections +// according to the C³ heuristic. All clusters are then sorted by a density +// metric to further improve locality. +DenseMap coff::computeCallGraphProfileOrder() { + return CallGraphSort().run(); +} diff --git a/lld/COFF/CallGraphSort.h b/lld/COFF/CallGraphSort.h new file mode 100644 index 0000000..e4f3721 --- /dev/null +++ b/lld/COFF/CallGraphSort.h @@ -0,0 +1,22 @@ +//===- CallGraphSort.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_CALL_GRAPH_SORT_H +#define LLD_COFF_CALL_GRAPH_SORT_H + +#include "llvm/ADT/DenseMap.h" + +namespace lld { +namespace coff { +class SectionChunk; + +llvm::DenseMap computeCallGraphProfileOrder(); +} // namespace coff +} // namespace lld + +#endif diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 7c43917..286b67b 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -9,6 +9,7 @@ #ifndef LLD_COFF_CONFIG_H #define LLD_COFF_CONFIG_H +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/COFF.h" @@ -29,6 +30,7 @@ class DefinedRelative; class StringChunk; class Symbol; class InputFile; +class SectionChunk; // Short aliases. static const auto AMD64 = llvm::COFF::IMAGE_FILE_MACHINE_AMD64; @@ -201,6 +203,15 @@ struct Configuration { // Used for /lto-obj-path: llvm::StringRef ltoObjPath; + // Used for /call-graph-ordering-file: + llvm::MapVector, + uint64_t> + callGraphProfile; + bool callGraphProfileSort = false; + + // Used for /print-symbol-order: + StringRef printSymbolOrder; + uint64_t align = 4096; uint64_t imageBase = -1; uint64_t fileAlign = 512; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 9ceccef..55e97d5 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -34,6 +34,7 @@ #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" +#include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/LEB128.h" @@ -924,6 +925,75 @@ static void parseOrderFile(StringRef arg) { } } +static void parseCallGraphFile(StringRef path) { + std::unique_ptr mb = CHECK( + MemoryBuffer::getFile(path, -1, false, true), "could not open " + path); + + // Build a map from symbol name to section. + DenseMap map; + for (ObjFile *file : ObjFile::instances) + for (Symbol *sym : file->getSymbols()) + if (sym) + map[sym->getName()] = sym; + + auto findSection = [&](StringRef name) -> SectionChunk * { + Symbol *sym = map.lookup(name); + if (!sym) { + if (config->warnMissingOrderSymbol) + warn(path + ": no such symbol: " + name); + return nullptr; + } + + if (DefinedCOFF *dr = dyn_cast_or_null(sym)) + return dyn_cast_or_null(dr->getChunk()); + return nullptr; + }; + + for (StringRef line : args::getLines(*mb)) { + SmallVector fields; + line.split(fields, ' '); + uint64_t count; + + if (fields.size() != 3 || !to_integer(fields[2], count)) { + error(path + ": parse error"); + return; + } + + if (SectionChunk *from = findSection(fields[0])) + if (SectionChunk *to = findSection(fields[1])) + config->callGraphProfile[{from, to}] += count; + } +} + +static void readCallGraphsFromObjectFiles() { + for (ObjFile *obj : ObjFile::instances) { + if (obj->callgraphSec) { + ArrayRef contents; + cantFail( + obj->getCOFFObj()->getSectionContents(obj->callgraphSec, contents)); + BinaryStreamReader reader(contents, support::little); + while (!reader.empty()) { + uint32_t fromIndex, toIndex; + uint64_t count; + if (Error err = reader.readInteger(fromIndex)) + fatal(toString(obj) + ": Expected 32-bit integer"); + if (Error err = reader.readInteger(toIndex)) + fatal(toString(obj) + ": Expected 32-bit integer"); + if (Error err = reader.readInteger(count)) + fatal(toString(obj) + ": Expected 64-bit integer"); + auto *fromSym = dyn_cast_or_null(obj->getSymbol(fromIndex)); + auto *toSym = dyn_cast_or_null(obj->getSymbol(toIndex)); + if (!fromSym || !toSym) + continue; + auto *from = dyn_cast_or_null(fromSym->getChunk()); + auto *to = dyn_cast_or_null(toSym->getChunk()); + if (from && to) + config->callGraphProfile[{from, to}] += count; + } + } + } +} + static void markAddrsig(Symbol *s) { if (auto *d = dyn_cast_or_null(s)) if (SectionChunk *c = dyn_cast_or_null(d->getChunk())) @@ -1587,9 +1657,11 @@ void LinkerDriver::link(ArrayRef argsArr) { args.hasFlag(OPT_auto_import, OPT_auto_import_no, config->mingw); config->pseudoRelocs = args.hasFlag( OPT_runtime_pseudo_reloc, OPT_runtime_pseudo_reloc_no, config->mingw); + config->callGraphProfileSort = args.hasFlag( + OPT_call_graph_profile_sort, OPT_call_graph_profile_sort_no, true); - // Don't warn about long section names, such as .debug_info, for mingw or when - // -debug:dwarf is requested. + // Don't warn about long section names, such as .debug_info, for mingw or + // when -debug:dwarf is requested. if (config->mingw || config->debugDwarf) config->warnLongSectionNames = false; @@ -2024,8 +2096,24 @@ void LinkerDriver::link(ArrayRef argsArr) { // Handle /order. We want to do this at this moment because we // need a complete list of comdat sections to warn on nonexistent // functions. - if (auto *arg = args.getLastArg(OPT_order)) + if (auto *arg = args.getLastArg(OPT_order)) { + if (args.hasArg(OPT_call_graph_ordering_file)) + error("/order and /call-graph-order-file may not be used together"); parseOrderFile(arg->getValue()); + config->callGraphProfileSort = false; + } + + // Handle /call-graph-ordering-file and /call-graph-profile-sort (default on). + if (config->callGraphProfileSort) { + if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file)) { + parseCallGraphFile(arg->getValue()); + } + readCallGraphsFromObjectFiles(); + } + + // Handle /print-symbol-order. + if (auto *arg = args.getLastArg(OPT_print_symbol_order)) + config->printSymbolOrder = arg->getValue(); // Identify unreferenced COMDAT sections. if (config->doGC) diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 4346b3a..0bcc6c9 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -249,6 +249,11 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber, return nullptr; } + if (name == ".llvm.call-graph-profile") { + callgraphSec = sec; + return nullptr; + } + // Object files may have DWARF debug info or MS CodeView debug info // (or both). // diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 50323f5..1e0b97a 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -191,6 +191,8 @@ public: const coff_section *addrsigSec = nullptr; + const coff_section *callgraphSec = nullptr; + // When using Microsoft precompiled headers, this is the PCH's key. // The same key is used by both the precompiled object, and objects using the // precompiled object. Any difference indicates out-of-date objects. diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index 087d53b..d1badf0 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -235,6 +235,17 @@ def dash_dash_version : Flag<["--"], "version">, def threads : P<"threads", "Number of threads. '1' disables multi-threading. By " "default all available hardware threads are used">; +def call_graph_ordering_file: P< + "call-graph-ordering-file", + "Layout sections to optimize the given callgraph">; +defm call_graph_profile_sort: B< + "call-graph-profile-sort", + "Reorder sections with call graph profile (default)", + "Do not reorder sections with call graph profile">; +def print_symbol_order: P< + "print-symbol-order", + "Print a symbol order specified by /call-graph-ordering-file and " + "/call-graph-profile-sort into the specified file">; // Flags for debugging def lldmap : F<"lldmap">; diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 082de5b..36ecdcd 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "Writer.h" +#include "CallGraphSort.h" #include "Config.h" #include "DLL.h" #include "InputFiles.h" @@ -229,6 +230,7 @@ private: void setSectionPermissions(); void writeSections(); void writeBuildId(); + void sortSections(); void sortExceptionTable(); void sortCRTSectionChunks(std::vector &chunks); void addSyntheticIdata(); @@ -798,6 +800,19 @@ static bool shouldStripSectionSuffix(SectionChunk *sc, StringRef name) { name.startswith(".xdata$") || name.startswith(".eh_frame$"); } +void Writer::sortSections() { + if (!config->callGraphProfile.empty()) { + DenseMap order = computeCallGraphProfileOrder(); + for (auto it : order) { + if (DefinedRegular *sym = it.first->sym) + config->order[sym->getName()] = it.second; + } + } + if (!config->order.empty()) + for (auto it : partialSections) + sortBySectionOrder(it.second->chunks); +} + // Create output section objects and add them to OutputSections. void Writer::createSections() { // First, create the builtin sections. @@ -861,10 +876,7 @@ void Writer::createSections() { if (hasIdata) addSyntheticIdata(); - // Process an /order option. - if (!config->order.empty()) - for (auto it : partialSections) - sortBySectionOrder(it.second->chunks); + sortSections(); if (hasIdata) locateImportTables(); diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp index 21c641b..15da4d2 100644 --- a/lld/ELF/CallGraphSort.cpp +++ b/lld/ELF/CallGraphSort.cpp @@ -68,7 +68,7 @@ struct Cluster { int next; int prev; - size_t size = 0; + uint64_t size; uint64_t weight = 0; uint64_t initialWeight = 0; Edge bestPred = {-1, 0}; @@ -223,14 +223,14 @@ DenseMap CallGraphSort::run() { DenseMap orderMap; int curOrder = 1; - for (int leader : sorted) + for (int leader : sorted) { for (int i = leader;;) { orderMap[sections[i]] = curOrder++; i = clusters[i].next; if (i == leader) break; } - + } if (!config->printSymbolOrder.empty()) { std::error_code ec; raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None); diff --git a/lld/test/COFF/cgprofile-bad-clusters.s b/lld/test/COFF/cgprofile-bad-clusters.s new file mode 100644 index 0000000..12c0542 --- /dev/null +++ b/lld/test/COFF/cgprofile-bad-clusters.s @@ -0,0 +1,61 @@ +# REQUIRES: x86 +# This test checks that CallGraphSort ignores edges that would form "bad" +# clusters. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-win32 %s -o %t +# RUN: echo "A C 1" > %t.call_graph +# RUN: echo "E B 4" >> %t.call_graph +# RUN: echo "C D 2" >> %t.call_graph +# RUN: echo "B D 1" >> %t.call_graph +# RUN: echo "F G 6" >> %t.call_graph +# RUN: echo "G H 5" >> %t.call_graph +# RUN: echo "H I 4" >> %t.call_graph +# RUN: lld-link /subsystem:console /entry:A %t /call-graph-ordering-file:%t.call_graph /out:%t2 /debug:symtab +# RUN: llvm-nm --numeric-sort %t2 | FileCheck %s + + .section .text,"ax",one_only,A + .globl A +A: + retq + + .section .text,"ax",one_only,D +D: + .fill 1000, 1, 0 + + .section .text,"ax",one_only,E +E: + retq + + .section .text,"ax",one_only,C +C: + retq + + .section .text,"ax",one_only,B +B: + .fill 1000, 1, 0 + + .section .text,"ax",one_only,F +F: + .fill (1024 * 1024) - 1, 1, 0 + + .section .text,"ax",one_only,G +G: + retq + + .section .text,"ax",one_only,H +H: + retq + + .section .text,"ax",one_only,I +I: + .fill 13, 1, 0 + +# CHECK: 140001000 t H +# CHECK: 140001001 t I +# CHECK: 14000100e T A +# CHECK: 14000100f t C +# CHECK: 140001010 t E +# CHECK: 140001011 t B +# CHECK: 1400013f9 t D +# CHECK: 1400017e1 t F +# CHECK: 1401017e0 t G diff --git a/lld/test/COFF/cgprofile-err.s b/lld/test/COFF/cgprofile-err.s new file mode 100644 index 0000000..94c1c2a --- /dev/null +++ b/lld/test/COFF/cgprofile-err.s @@ -0,0 +1,11 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-win32 %s -o %t + +# RUN: echo "A B C 100" > %t.call_graph +# RUN: not lld-link /dll /noentry /subsystem:console %t /call-graph-ordering-file:%t.call_graph /out:/dev/null 2>&1 | FileCheck %s + +# CHECK: {{.*}}.call_graph: parse error + +# RUN: echo "A B C" > %t.call_graph +# RUN: not lld-link /dll /noentry /subsystem:console %t /call-graph-ordering-file:%t.call_graph /out:/dev/null 2>&1 | FileCheck %s diff --git a/lld/test/COFF/cgprofile-icf.s b/lld/test/COFF/cgprofile-icf.s new file mode 100644 index 0000000..19cdd0f --- /dev/null +++ b/lld/test/COFF/cgprofile-icf.s @@ -0,0 +1,45 @@ +# REQUIRES: x86 +# Test the compatibility of ICF and cgprofile. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-win32 %s -o %t +# RUN: echo "A B 100" > %t.call_graph +# RUN: echo "A C 40" >> %t.call_graph +# RUN: echo "C D 61" >> %t.call_graph +# RUN: lld-link /subsystem:console /entry:A %t /call-graph-ordering-file:%t.call_graph /out:%t2 /debug:symtab /opt:icf +# RUN: llvm-nm --numeric-sort %t2 | FileCheck %s +# RUN: lld-link /subsystem:console /entry:A %t /call-graph-ordering-file:%t.call_graph /out:%t2 /debug:symtab +# RUN: llvm-nm --numeric-sort %t2 | FileCheck %s --check-prefix=NOICF + + .section .text,"x",one_only,D + .globl D +D: + mov $60, %rax + retq + + .section .text,"x",one_only,C + .globl C +C: + mov $60, %rax + retq + + .section .text,"x",one_only,B + .globl B +B: + mov $2, %rax + retq + + .section .text,"x",one_only,A + .globl A +A: + mov $42, %rax + retq + +# CHECK: 140001000 T A +# CHECK: 140001008 T C +# CHECK: 140001008 T D +# CHECK: 140001010 T B + +# NOICF: 140001000 T A +# NOICF: 140001008 T B +# NOICF: 140001010 T C +# NOICF: 140001018 T D diff --git a/lld/test/COFF/cgprofile-obj.s b/lld/test/COFF/cgprofile-obj.s new file mode 100644 index 0000000..b267850 --- /dev/null +++ b/lld/test/COFF/cgprofile-obj.s @@ -0,0 +1,45 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-win32 %s -o %t +# RUN: lld-link /subsystem:console /entry:A %t /out:%t2 /debug:symtab +# RUN: llvm-nm --numeric-sort %t2 | FileCheck %s +# RUN: lld-link /call-graph-profile-sort:no /subsystem:console /entry:A %t /out:%t3 /debug:symtab +# RUN: llvm-nm --numeric-sort %t3 | FileCheck %s --check-prefix=NO-CG + + .section .text,"ax", one_only, D +D: + retq + + .section .text,"ax", one_only, C + .globl C +C: + retq + + .section .text,"ax", one_only, B + .globl B +B: + retq + + .section .text,"ax", one_only, A + .globl A +A: +Aa: + retq + + .cg_profile A, B, 10 + .cg_profile A, B, 10 + .cg_profile Aa, B, 80 + .cg_profile A, C, 40 + .cg_profile B, C, 30 + .cg_profile C, D, 90 + +# CHECK: 140001000 T A +# CHECK: 140001001 T B +# CHECK: 140001002 T C +# CHECK: 140001003 t D + + +# NO-CG: 140001000 t D +# NO-CG: 140001001 T C +# NO-CG: 140001002 T B +# NO-CG: 140001003 T A diff --git a/lld/test/COFF/cgprofile-print.s b/lld/test/COFF/cgprofile-print.s new file mode 100644 index 0000000..e82185c --- /dev/null +++ b/lld/test/COFF/cgprofile-print.s @@ -0,0 +1,34 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-win32 %s -o %t +# RUN: echo "A B 5" > %t.call_graph +# RUN: echo "B C 50" >> %t.call_graph +# RUN: echo "C D 40" >> %t.call_graph +# RUN: echo "D B 10" >> %t.call_graph +# RUN: lld-link /subsystem:console /entry:A %t /call-graph-ordering-file:%t.call_graph /out:%t2 /print-symbol-order:%t3 +# RUN: FileCheck %s --input-file %t3 + +# CHECK: B +# CHECK-NEXT: C +# CHECK-NEXT: D +# CHECK-NEXT: A + +.section .text, "x", one_only, A +.globl A +A: + nop + +.section .text, "x", one_only, B +.globl B +B: + nop + +.section .text, "x", one_only, C +.globl C +C: + nop + +.section .text, "x", one_only, D +.globl D +D: + nop diff --git a/lld/test/COFF/cgprofile-txt.s b/lld/test/COFF/cgprofile-txt.s new file mode 100644 index 0000000..49cade9 --- /dev/null +++ b/lld/test/COFF/cgprofile-txt.s @@ -0,0 +1,43 @@ +# REQUIRES: x86 +# Test correctness of call graph ordering. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-win32 %s -o %t +# RUN: lld-link /subsystem:console /entry:A %t /out:%t2 /debug:symtab +# RUN: llvm-nm --numeric-sort %t2 | FileCheck %s --check-prefix=NOSORT + +# RUN: echo "A B 5" > %t.call_graph +# RUN: echo "B C 50" >> %t.call_graph +# RUN: echo "C D 40" >> %t.call_graph +# RUN: echo "D B 10" >> %t.call_graph +# RUN: lld-link /subsystem:console /entry:A %t /call-graph-ordering-file:%t.call_graph /out:%t2 /debug:symtab +# RUN: llvm-nm --numeric-sort %t2 | FileCheck %s + +# NOSORT: 140001000 T A +# NOSORT: 140001001 T B +# NOSORT: 140001002 T C +# NOSORT: 140001003 T D + +# CHECK: 140001000 T B +# CHECK: 140001001 T C +# CHECK: 140001002 T D +# CHECK: 140001003 T A + +.section .text, "x", one_only, A +.globl A +A: + nop + +.section .text, "x", one_only, B +.globl B +B: + nop + +.section .text, "x", one_only, C +.globl C +C: + nop + +.section .text, "x", one_only, D +.globl D +D: + nop -- 2.7.4