/// +-------+
#include "MachONormalizedFile.h"
+#include "MachONormalizedFileBinaryUtils.h"
#include "File.h"
#include "Atoms.h"
namespace mach_o {
namespace normalized {
+enum SymbolsInSection {
+ symbolsOk,
+ symbolsIgnored,
+ symbolsIllegal
+};
+
static uint64_t nextSymbolAddress(const NormalizedFile &normalizedFile,
- const Symbol &symbol) {
+ const Symbol &symbol) {
uint64_t symbolAddr = symbol.value;
uint8_t symbolSectionIndex = symbol.sect;
const Section §ion = normalizedFile.sections[symbolSectionIndex - 1];
return DefinedAtom::typeCode;
}
-static void processSymbol(const NormalizedFile &normalizedFile, MachOFile &file,
- const Symbol &sym, bool copyRefs) {
- // Mach-O symbol table does have size in it, so need to scan ahead
- // to find symbol with next highest address.
+static error_code
+processSymbol(const NormalizedFile &normalizedFile, MachOFile &file,
+ const Symbol &sym, bool copyRefs,
+ const SmallVector<SymbolsInSection, 32> symbolsInSect) {
+ if (sym.sect > normalizedFile.sections.size()) {
+ int sectionIndex = sym.sect;
+ return make_dynamic_error_code(Twine("Symbol '") + sym.name
+ + "' has n_sect ("
+ + Twine(sectionIndex)
+ + ") which is too large");
+ }
const Section §ion = normalizedFile.sections[sym.sect - 1];
+ switch (symbolsInSect[sym.sect-1]) {
+ case symbolsOk:
+ break;
+ case symbolsIgnored:
+ return error_code::success();
+ break;
+ case symbolsIllegal:
+ return make_dynamic_error_code(Twine("Symbol '") + sym.name
+ + "' is not legal in section "
+ + section.segmentName + "/"
+ + section.sectionName);
+ break;
+ }
+
uint64_t offset = sym.value - section.address;
+ // Mach-O symbol table does have size in it, so need to scan ahead
+ // to find symbol with next highest address.
uint64_t size = nextSymbolAddress(normalizedFile, sym) - sym.value;
if (section.type == llvm::MachO::S_ZEROFILL) {
file.addZeroFillDefinedAtom(sym.name, atomScope(sym.scope), size, copyRefs);
- }
- else if ((section.type == llvm::MachO::S_CSTRING_LITERALS) &&
- (sym.name[0] == 'L')) {
- // Ignore L labels on cstrings.
} else {
ArrayRef<uint8_t> atomContent = section.content.slice(offset, size);
DefinedAtom::Merge m = DefinedAtom::mergeNo;
file.addDefinedAtom(sym.name, atomScope(sym.scope),
atomTypeFromSection(section), m, atomContent, copyRefs);
}
+ return error_code::success();
}
return error_code::success();
}
+
+// A __TEXT/__eh_frame section contains dwarf unwind CFIs (either CIE or FDE).
+// Atom boundaries are determined by looking at the length content header
+// in each CFI.
+static error_code processCFISection(MachOFile &file, const Section §ion,
+ bool is64, bool swap, bool copyRefs) {
+ const unsigned char* buffer = section.content.data();
+ for (size_t offset = 0, end = section.content.size(); offset < end; ) {
+ size_t remaining = end - offset;
+ if (remaining < 16) {
+ return make_dynamic_error_code(Twine("Section __TEXT/__eh_frame is "
+ "malformed. Not enough room left for "
+ "a CFI starting at offset ("
+ + Twine(offset)
+ + ")"));
+ }
+ const uint32_t *cfi = reinterpret_cast<const uint32_t *>(&buffer[offset]);
+ uint32_t len = read32(swap, *cfi) + 4;
+ if (offset+len > end) {
+ return make_dynamic_error_code(Twine("Section __TEXT/__eh_frame is "
+ "malformed. Size of CFI starting at "
+ "at offset ("
+ + Twine(offset)
+ + ") is past end of section."));
+ }
+ ArrayRef<uint8_t> bytes = section.content.slice(offset, len);
+ file.addDefinedAtom(StringRef(), DefinedAtom::scopeTranslationUnit,
+ DefinedAtom::typeCFI, DefinedAtom::mergeNo,
+ bytes, copyRefs);
+ offset += len;
+ }
+ return error_code::success();
+}
+
static error_code processSection(MachOFile &file, const Section §ion,
- bool is64, bool copyRefs) {
+ bool is64, bool swap, bool copyRefs,
+ SymbolsInSection &symbolsInSect) {
unsigned offset = 0;
const unsigned pointerSize = (is64 ? 8 : 4);
switch (section.type) {
case llvm::MachO::S_REGULAR:
if (section.segmentName.equals("__TEXT") &&
section.sectionName.equals("__ustring")) {
+ symbolsInSect = symbolsIgnored;
return processUTF16Section(file, section, is64, copyRefs);
- }
- else if (section.segmentName.equals("__DATA") &&
+ } else if (section.segmentName.equals("__DATA") &&
section.sectionName.equals("__cfstring")) {
+ symbolsInSect = symbolsIllegal;
return processCFStringSection(file, section, is64, copyRefs);
}
break;
case llvm::MachO::S_COALESCED:
+ if (section.segmentName.equals("__TEXT") &&
+ section.sectionName.equals("__eh_frame")) {
+ symbolsInSect = symbolsIgnored;
+ return processCFISection(file, section, is64, swap, copyRefs);
+ }
case llvm::MachO::S_ZEROFILL:
// These sections are broken into atoms based on symbols.
break;
bytes, copyRefs);
offset += pointerSize;
}
+ symbolsInSect = symbolsIllegal;
break;
case S_MOD_TERM_FUNC_POINTERS:
if ((section.content.size() % pointerSize) != 0) {
bytes, copyRefs);
offset += pointerSize;
}
+ symbolsInSect = symbolsIllegal;
break;
case S_NON_LAZY_SYMBOL_POINTERS:
if ((section.content.size() % pointerSize) != 0) {
bytes, copyRefs);
offset += pointerSize;
}
+ symbolsInSect = symbolsIllegal;
break;
case llvm::MachO::S_CSTRING_LITERALS:
for (size_t i = 0, e = section.content.size(); i != e; ++i) {
"last string in the section is not zero "
"terminated.");
}
+ symbolsInSect = symbolsIgnored;
break;
case llvm::MachO::S_4BYTE_LITERALS:
if ((section.content.size() % 4) != 0)
DefinedAtom::mergeByContent, byteContent, copyRefs);
offset += 4;
}
+ symbolsInSect = symbolsIllegal;
break;
case llvm::MachO::S_8BYTE_LITERALS:
if ((section.content.size() % 8) != 0)
DefinedAtom::mergeByContent, byteContent, copyRefs);
offset += 8;
}
+ symbolsInSect = symbolsIllegal;
break;
case llvm::MachO::S_16BYTE_LITERALS:
if ((section.content.size() % 16) != 0)
DefinedAtom::mergeByContent, byteContent, copyRefs);
offset += 16;
}
+ symbolsInSect = symbolsIllegal;
break;
default:
llvm_unreachable("mach-o section type not supported yet");
bool copyRefs) {
std::unique_ptr<MachOFile> file(new MachOFile(path));
+ // Create atoms from sections that don't have symbols.
+ bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
+ bool swap = !MachOLinkingContext::isHostEndian(normalizedFile.arch);
+ SmallVector<SymbolsInSection, 32> symbolsInSect;
+ for (auto § : normalizedFile.sections) {
+ symbolsInSect.push_back(symbolsOk);
+ if (error_code ec = processSection(*file, sect, is64, swap, copyRefs,
+ symbolsInSect.back()))
+ return ec;
+ }
// Create atoms from global symbols.
for (const Symbol &sym : normalizedFile.globalSymbols) {
- processSymbol(normalizedFile, *file, sym, copyRefs);
+ if (error_code ec = processSymbol(normalizedFile, *file, sym, copyRefs,
+ symbolsInSect))
+ return ec;
}
// Create atoms from local symbols.
for (const Symbol &sym : normalizedFile.localSymbols) {
- processSymbol(normalizedFile, *file, sym, copyRefs);
+ if (error_code ec = processSymbol(normalizedFile, *file, sym, copyRefs,
+ symbolsInSect))
+ return ec;
}
- // Create atoms from undefinded symbols.
+ // Create atoms from undefined symbols.
for (auto &sym : normalizedFile.undefinedSymbols) {
processUndefindeSymbol(*file, sym, copyRefs);
}
- // Create atoms from sections that don't have symbols.
- bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
- for (auto § : normalizedFile.sections) {
- if (error_code ec = processSection(*file, sect, is64, copyRefs))
- return ec;
- }
return std::unique_ptr<File>(std::move(file));
}
--- /dev/null
+# RUN: lld -flavor darwin -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s
+#
+# Test parsing of __eh_frame (dwarf unwind) section.
+#
+
+--- !mach-o
+arch: x86_64
+file-type: MH_OBJECT
+flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]
+has-UUID: false
+OS: unknown
+sections:
+ - segment: __TEXT
+ section: __text
+ type: S_REGULAR
+ attributes: [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ]
+ address: 0x0000000000000000
+ content: [ 0x55, 0x48, 0x89, 0xE5, 0xB8, 0x09, 0x00, 0x00,
+ 0x00, 0x5D, 0xC3, 0x55, 0x48, 0x89, 0xE5, 0xB8,
+ 0x0A, 0x00, 0x00, 0x00, 0x5D, 0xC3 ]
+ - segment: __TEXT
+ section: __eh_frame
+ type: S_COALESCED
+ attributes: [ ]
+ alignment: 3
+ address: 0x0000000000000058
+ content: [ 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x7A, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01,
+ 0x10, 0x0C, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00,
+ 0x24, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00,
+ 0x88, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D,
+ 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x24, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00,
+ 0x6B, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x41, 0x0E, 0x10, 0x86, 0x02, 0x43, 0x0D,
+ 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ]
+global-symbols:
+ - name: __Z3barv
+ type: N_SECT
+ scope: [ N_EXT ]
+ sect: 1
+ value: 0x0000000000000000
+ - name: __Z3foov
+ type: N_SECT
+ scope: [ N_EXT ]
+ sect: 1
+ value: 0x000000000000000B
+...
+
+# CHECK: defined-atoms:
+# CHECK: - type: unwind-cfi
+# CHECK: content: [ 14, 00, 00, 00, 00, 00, 00, 00, 01, 7A, 52, 00,
+# CHECK: 01, 78, 10, 01, 10, 0C, 07, 08, 90, 01, 00, 00 ]
+# CHECK: - type: unwind-cfi
+# CHECK: content: [ 24, 00, 00, 00, 1C, 00, 00, 00, 88, FF, FF, FF,
+# CHECK: FF, FF, FF, FF, 0B, 00, 00, 00, 00, 00, 00, 00,
+# CHECK: 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00,
+# CHECK: 00, 00, 00, 00 ]
+# CHECK: - type: unwind-cfi
+# CHECK: content: [ 24, 00, 00, 00, 44, 00, 00, 00, 6B, FF, FF, FF,
+# CHECK: FF, FF, FF, FF, 0B, 00, 00, 00, 00, 00, 00, 00,
+# CHECK: 00, 41, 0E, 10, 86, 02, 43, 0D, 06, 00, 00, 00,
+# CHECK: 00, 00, 00, 00 ]
+# CHECK: - name: __Z3barv
+# CHECK: scope: global
+# CHECK: content: [ 55, 48, 89, E5, B8, 09, 00, 00, 00, 5D, C3 ]
+# CHECK: - name: __Z3foov
+# CHECK: scope: global
+# CHECK: content: [ 55, 48, 89, E5, B8, 0A, 00, 00, 00, 5D, C3 ]
+