From 025a5b22c848364be0009a630e7fb53f40515e68 Mon Sep 17 00:00:00 2001 From: Daniel Bertalan Date: Fri, 9 Sep 2022 17:18:10 +0200 Subject: [PATCH] [lld-macho] Sort data-in-code entries Previously, we would add entries to DataInCodeSection in the order they appeared in input files. Because of this, entries would not be sorted if sections were reordered due to e.g. `-order_file` or call graph profile sorting. ld64 always keeps data-in-code information sorted. This commit also fixes an incorrect assertion. The original assertion from D103006 used to check that data-in-code entries are sorted in the input objects -- likely because we use binary search on that data. In D115556, the assertion was moved into `collectDataInCodeEntries`, but the checked variable's name was not changed, so it ended up checking the final contents of the DataInCodeSection. We no longer crash when building LLVM with PGO using an asserts build of LLD as the linker. Fixes https://bugs.chromium.org/p/chromium/issues/detail?id=1265937 Numbers for linking the Chromium Framework reproducer from #48001, which has 6829 data-in-code entries: x before + after N Min Max Median Avg Stddev x 20 2.1076453 2.3059683 2.1132485 2.1350302 0.049905767 + 20 2.1069031 2.3915262 2.14465 2.1728429 0.084065898 No difference proven at 95.0% confidence Differential Revision: https://reviews.llvm.org/D133581 --- lld/MachO/SyntheticSections.cpp | 10 +++++-- lld/MachO/SyntheticSections.h | 5 ++-- lld/test/MachO/data-in-code-section-ordering.s | 39 ++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 lld/test/MachO/data-in-code-section-ordering.s diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index e51dde5..6a63982 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -933,8 +933,8 @@ static std::vector collectDataInCodeEntries() { if (entries.empty()) continue; - assert(is_sorted(dataInCodeEntries, [](const data_in_code_entry &lhs, - const data_in_code_entry &rhs) { + assert(is_sorted(entries, [](const data_in_code_entry &lhs, + const data_in_code_entry &rhs) { return lhs.offset < rhs.offset; })); // For each code subsection find 'data in code' entries residing in it. @@ -963,6 +963,12 @@ static std::vector collectDataInCodeEntries() { } } } + + // ld64 emits the table in sorted order too. + llvm::sort(dataInCodeEntries, + [](const data_in_code_entry &lhs, const data_in_code_entry &rhs) { + return lhs.offset < rhs.offset; + }); return dataInCodeEntries; } diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 176e842..20695dc 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -380,8 +380,9 @@ private: size_t size = 0; }; -// Stores 'data in code' entries that describe the locations of -// data regions inside code sections. +// Stores 'data in code' entries that describe the locations of data regions +// inside code sections. This is used by llvm-objdump to distinguish jump tables +// and stop them from being disassembled as instructions. class DataInCodeSection final : public LinkEditSection { public: DataInCodeSection(); diff --git a/lld/test/MachO/data-in-code-section-ordering.s b/lld/test/MachO/data-in-code-section-ordering.s new file mode 100644 index 0000000..91302a5 --- /dev/null +++ b/lld/test/MachO/data-in-code-section-ordering.s @@ -0,0 +1,39 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t + +## Check that data-in-code information is sorted even if +## sections are reordered compared to the input order. + +# RUN: sed -e s/SYM/_first/ %t/input.s | \ +# RUN: llvm-mc -filetype=obj --triple=x86_64-apple-darwin -o %t/first.o +# RUN: sed -e s/SYM/_second/ %t/input.s | \ +# RUN: llvm-mc -filetype=obj --triple=x86_64-apple-darwin -o %t/second.o +# RUN: sed -e s/SYM/_third/ %t/input.s | \ +# RUN: llvm-mc -filetype=obj --triple=x86_64-apple-darwin -o %t/third.o +# RUN: %lld -dylib -lSystem -order_file %t/order.txt %t/first.o %t/second.o %t/third.o -o %t/out +# RUN: llvm-objdump --macho --syms %t/out > %t/dump.txt +# RUN: llvm-objdump --macho --data-in-code %t/out >> %t/dump.txt +# RUN: FileCheck %s < %t/dump.txt + +# CHECK-LABEL: SYMBOL TABLE: +# CHECK-DAG: [[#%x, SECOND:]] g F __TEXT,__text _second +# CHECK-DAG: [[#%x, FIRST:]] g F __TEXT,__text _first +# CHECK-DAG: [[#%x, THIRD:]] g F __TEXT,__text _third + +# CHECK-LABEL: Data in code table (3 entries) +# CHECK-NEXT: offset length kind +# CHECK-NEXT: 0x[[#%.8x, SECOND]] 4 JUMP_TABLE32 +# CHECK-NEXT: 0x[[#%.8x, FIRST]] 4 JUMP_TABLE32 +# CHECK-NEXT: 0x[[#%.8x, THIRD]] 4 JUMP_TABLE32 + +#--- order.txt +_second +_first +_third + +#--- input.s +.globl SYM +SYM: +.data_region jt32 +.long 0 +.end_data_region -- 2.7.4