[BOLT] Don't use section relocations when computing hash for data from other section
authorDenis Revunov <revunov.denis@huawei-partners.com>
Fri, 24 Mar 2023 18:50:07 +0000 (21:50 +0300)
committerDenis Revunov <rnovds@gmail.com>
Fri, 24 Mar 2023 18:59:50 +0000 (21:59 +0300)
When computing symbol hashes in BinarySection::hash, we try to find relocations
in the section which reference the passed BinaryData. We do so by doing
lower_bound on data begin offset and upper_bound on data end offset. Since
offsets are relative to the current section, if it is a data from the previous
section, we get underflow when computing offset and lower_bound returns
Relocations.end(). If this data also ends where current section begins,
upper_bound on zero offset will return some valid iterator if we have any
relocations after the first byte. Then we'll try to iterate from lower_bound to
upper_bound, since they're not equal, which in that case means we'll dereference
Relocations.end(), increment it, and try to do so until we reach the second
valid iterator. Of course we reach segfault earlier. In this patch we stop BOLT
from searching relocations for symbols outside of the current section.

Reviewed By: rafauler

Differential Revision: https://reviews.llvm.org/D146620

bolt/lib/Core/BinarySection.cpp
bolt/test/AArch64/Inputs/symbol-hashes.yaml [new file with mode: 0644]
bolt/test/AArch64/symbol-hashes.test [new file with mode: 0644]

index 1e28da4..846515c 100644 (file)
@@ -39,7 +39,13 @@ BinarySection::hash(const BinaryData &BD,
   if (Itr != Cache.end())
     return Itr->second;
 
-  Cache[&BD] = 0;
+  hash_code Hash =
+      hash_combine(hash_value(BD.getSize()), hash_value(BD.getSectionName()));
+
+  Cache[&BD] = Hash;
+
+  if (!containsRange(BD.getAddress(), BD.getSize()))
+    return Hash;
 
   uint64_t Offset = BD.getAddress() - getAddress();
   const uint64_t EndOffset = BD.getEndAddress() - getAddress();
@@ -47,9 +53,6 @@ BinarySection::hash(const BinaryData &BD,
   auto End = Relocations.upper_bound(Relocation{EndOffset, 0, 0, 0, 0});
   const StringRef Contents = getContents();
 
-  hash_code Hash =
-      hash_combine(hash_value(BD.getSize()), hash_value(BD.getSectionName()));
-
   while (Begin != End) {
     const Relocation &Rel = *Begin++;
     Hash = hash_combine(
diff --git a/bolt/test/AArch64/Inputs/symbol-hashes.yaml b/bolt/test/AArch64/Inputs/symbol-hashes.yaml
new file mode 100644 (file)
index 0000000..ce7f592
--- /dev/null
@@ -0,0 +1,83 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_EXEC
+  Machine:         EM_AARCH64
+  Entry:           0x90
+ProgramHeaders:
+  - Type:            PT_LOAD
+    Flags:           [ PF_X, PF_R ]
+    FirstSec:        .rodata
+    LastSec:         .text
+    Align:           0x10000
+    Offset:          0x0
+Sections:
+  - Name:            .rodata
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x78
+    AddressAlign:    0x1
+    Content:         '7800000000000000'
+  - Name:            .dummy
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x80
+    AddressAlign:    0x1
+    Content:         '78000000000000009000000000000000'
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x90
+    AddressAlign:    0x4
+    Content:         FF4300D11F2003D508FFFF10080140F9E80700F9A80B8052010000D4FF430091C0035FD6
+  - Name:            .rela.text
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK ]
+    Link:            .symtab
+    AddressAlign:    0x8
+    Info:            .text
+    Relocations:
+      - Offset:          0x94
+        Symbol:          Symbol
+        Type:            R_AARCH64_ADR_GOT_PAGE
+      - Offset:          0x98
+        Symbol:          Symbol
+        Type:            R_AARCH64_LD64_GOT_LO12_NC
+  - Name:            .rela.dummy
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK ]
+    Link:            .symtab
+    AddressAlign:    0x8
+    Info:            .dummy
+    Relocations:
+      - Offset:          0x80
+        Symbol:          Symbol
+        Type:            R_AARCH64_ABS64
+      - Offset:          0x88
+        Symbol:          _start
+        Type:            R_AARCH64_ABS64
+Symbols:
+  - Name:            tmp.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            '$x.0'
+    Section:         .text
+    Value:           0x90
+  - Name:            '$d.1'
+    Index:           SHN_ABS
+  - Name:            .text
+    Type:            STT_SECTION
+    Section:         .text
+    Value:           0x90
+  - Name:            _start
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x90
+    Size:            0x24
+  - Name:            Symbol
+    Section:         .rodata
+    Binding:         STB_GLOBAL
+    Value:           0x78
+...
diff --git a/bolt/test/AArch64/symbol-hashes.test b/bolt/test/AArch64/symbol-hashes.test
new file mode 100644 (file)
index 0000000..3f2d869
--- /dev/null
@@ -0,0 +1,6 @@
+// This test checks that we don't try to use symbols outside of section when
+// generating symbol hashes
+
+RUN: yaml2obj %p/Inputs/symbol-hashes.yaml -o %t.exe
+RUN: llvm-bolt %t.exe -force-data-relocations -o %t.exe.bolt
+