From 9ede702bc4ef97df625c0e69be225f1fd5a6a55c Mon Sep 17 00:00:00 2001 From: Nick Kledzik Date: Thu, 29 May 2014 20:44:21 +0000 Subject: [PATCH] [mach-o] Add support for parsing CFString sections llvm-svn: 209844 --- .../MachO/MachONormalizedFileFromAtoms.cpp | 3 + .../MachO/MachONormalizedFileToAtoms.cpp | 89 +++++++++++++++------ lld/test/mach-o/parse-cfstring32.yaml | 78 +++++++++++++++++++ lld/test/mach-o/parse-cfstring64.yaml | 91 ++++++++++++++++++++++ 4 files changed, 236 insertions(+), 25 deletions(-) create mode 100644 lld/test/mach-o/parse-cfstring32.yaml create mode 100644 lld/test/mach-o/parse-cfstring64.yaml diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp index 1b548d9..6531455 100644 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp @@ -197,6 +197,9 @@ SectionInfo *Util::makeSection(DefinedAtom::ContentType type) { case DefinedAtom::typeUTF16String: return new (_allocator) SectionInfo("__TEXT", "__ustring", S_REGULAR); + case DefinedAtom::typeCFString: + return new (_allocator) SectionInfo("__DATA", "__cfstring", + S_REGULAR); default: llvm_unreachable("TO DO: add support for more sections"); break; diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp index 4a8d4b7..c06289a 100644 --- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp +++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp @@ -87,8 +87,12 @@ static void processSymbol(const NormalizedFile &normalizedFile, MachOFile &file, const Section §ion = normalizedFile.sections[sym.sect - 1]; uint64_t offset = sym.value - section.address; uint64_t size = nextSymbolAddress(normalizedFile, sym) - sym.value; - if (section.type == llvm::MachO::S_ZEROFILL){ + if (section.type == llvm::MachO::S_ZEROFILL) { file.addZeroFillDefinedAtom(sym.name, atomScope(sym.scope), size, copyRefs); + } + else if ((section.type == llvm::MachO::S_CSTRING_LITERALS) && + (sym.name[0] == 'L')) { + // Ignore L labels on cstrings. } else { ArrayRef atomContent = section.content.slice(offset, size); DefinedAtom::Merge m = DefinedAtom::mergeNo; @@ -111,37 +115,72 @@ static void processUndefindeSymbol(MachOFile &file, const Symbol &sym, } } +// A __TEXT/__ustring section contains UTF16 strings. Atom boundaries are +// determined by finding the terminating 0x0000 in each string. +static error_code processUTF16Section(MachOFile &file, const Section §ion, + bool is64, bool copyRefs) { + if ((section.content.size() % 4) != 0) + return make_dynamic_error_code(Twine("Section ") + section.segmentName + + "/" + section.sectionName + + " has a size that is not even"); + unsigned offset = 0; + for (size_t i = 0, e = section.content.size(); i != e; i +=2) { + if ((section.content[i] == 0) && (section.content[i+1] == 0)) { + unsigned size = i - offset + 2; + ArrayRef utf16Content = section.content.slice(offset, size); + file.addDefinedAtom(StringRef(), DefinedAtom::scopeLinkageUnit, + DefinedAtom::typeUTF16String, + DefinedAtom::mergeByContent, utf16Content, + copyRefs); + offset = i + 2; + } + } + if (offset != section.content.size()) { + return make_dynamic_error_code(Twine("Section ") + section.segmentName + + "/" + section.sectionName + + " is supposed to contain 0x0000 " + "terminated UTF16 strings, but the " + "last string in the section is not zero " + "terminated."); + } + return error_code::success(); +} + +// A __DATA/__cfstring section contain NS/CFString objects. Atom boundaries +// are determined because each object is known to be 4 pointers in size. +static error_code processCFStringSection(MachOFile &file,const Section §ion, + bool is64, bool copyRefs) { + const uint32_t cfsObjSize = (is64 ? 32 : 16); + if ((section.content.size() % cfsObjSize) != 0) { + return make_dynamic_error_code(Twine("Section __DATA/__cfstring has a size " + "(" + Twine(section.content.size()) + + ") that is not a multiple of " + + Twine(cfsObjSize))); + } + unsigned offset = 0; + for (size_t i = 0, e = section.content.size(); i != e; i += cfsObjSize) { + ArrayRef byteContent = section.content.slice(offset, cfsObjSize); + file.addDefinedAtom(StringRef(), DefinedAtom::scopeLinkageUnit, + DefinedAtom::typeCFString, + DefinedAtom::mergeByContent, byteContent, copyRefs); + offset += cfsObjSize; + } + return error_code::success(); +} + static error_code processSection(MachOFile &file, const Section §ion, bool is64, bool copyRefs) { unsigned offset = 0; const unsigned pointerSize = (is64 ? 8 : 4); switch (section.type) { case llvm::MachO::S_REGULAR: - if (section.segmentName.equals("__TEXT") && + if (section.segmentName.equals("__TEXT") && section.sectionName.equals("__ustring")) { - if ((section.content.size() % 4) != 0) - return make_dynamic_error_code(Twine("Section ") + section.segmentName - + "/" + section.sectionName - + " has a size that is not even"); - for (size_t i = 0, e = section.content.size(); i != e; i +=2) { - if ((section.content[i] == 0) && (section.content[i+1] == 0)) { - unsigned size = i - offset + 2; - ArrayRef utf16Content = section.content.slice(offset, size); - file.addDefinedAtom(StringRef(), DefinedAtom::scopeLinkageUnit, - DefinedAtom::typeUTF16String, - DefinedAtom::mergeByContent, utf16Content, - copyRefs); - offset = i + 2; - } - } - if (offset != section.content.size()) { - return make_dynamic_error_code(Twine("Section ") + section.segmentName - + "/" + section.sectionName - + " is supposed to contain 0x0000 " - "terminated UTF16 strings, but the " - "last string in the section is not zero " - "terminated."); - } + return processUTF16Section(file, section, is64, copyRefs); + } + else if (section.segmentName.equals("__DATA") && + section.sectionName.equals("__cfstring")) { + return processCFStringSection(file, section, is64, copyRefs); } break; case llvm::MachO::S_COALESCED: diff --git a/lld/test/mach-o/parse-cfstring32.yaml b/lld/test/mach-o/parse-cfstring32.yaml new file mode 100644 index 0000000..8f4b041 --- /dev/null +++ b/lld/test/mach-o/parse-cfstring32.yaml @@ -0,0 +1,78 @@ +# RUN: lld -flavor darwin -arch i386 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of mach-o functions. +# + +--- !mach-o +arch: x86 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000000 + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x74, 0x68, + 0x65, 0x72, 0x65, 0x00 ] + - segment: __DATA + section: __cfstring + type: S_REGULAR + attributes: [ ] + alignment: 3 + address: 0x0000000000000010 + content: [ 0x00, 0x00, 0x00, 0x00, 0xC8, 0x07, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xC8, 0x07, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000018 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000010 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: true + symbol: 1 + - offset: 0x00000008 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: false + symbol: 1 + - offset: 0x00000000 + type: GENERIC_RELOC_VANILLA + length: 2 + pc-rel: false + extern: true + symbol: 1 +undefined-symbols: + - name: ___CFConstantStringClassReference + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: defined-atoms: +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 68, 65, 6C, 6C, 6F, 00 ] +# CHECK: merge: by-content +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 74, 68, 65, 72, 65, 00 ] +# CHECK: merge: by-content +# CHECK: - scope: hidden +# CHECK: type: cfstring +# CHECK: merge: by-content +# CHECK: - scope: hidden +# CHECK: type: cfstring +# CHECK: merge: by-content +# CHECK:undefined-atoms: +# CHECK: - name: ___CFConstantStringClassReference diff --git a/lld/test/mach-o/parse-cfstring64.yaml b/lld/test/mach-o/parse-cfstring64.yaml new file mode 100644 index 0000000..ebb02a7 --- /dev/null +++ b/lld/test/mach-o/parse-cfstring64.yaml @@ -0,0 +1,91 @@ +# RUN: lld -flavor darwin -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s +# +# Test parsing of CFString constants. +# + +--- !mach-o +arch: x86_64 +file-type: MH_OBJECT +flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ] +has-UUID: false +OS: unknown +sections: + - segment: __TEXT + section: __cstring + type: S_CSTRING_LITERALS + attributes: [ ] + address: 0x0000000000000000 + content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x74, 0x68, + 0x65, 0x72, 0x65, 0x00 ] + - segment: __DATA + section: __cfstring + type: S_REGULAR + attributes: [ ] + alignment: 4 + address: 0x0000000000000010 + content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xC8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xC8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ] + relocations: + - offset: 0x00000030 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 1 + - offset: 0x00000020 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 + - offset: 0x00000010 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 0 + - offset: 0x00000000 + type: X86_64_RELOC_UNSIGNED + length: 3 + pc-rel: false + extern: true + symbol: 2 +local-symbols: + - name: Lstr1 + type: N_SECT + sect: 1 + value: 0x0000000000000000 + - name: Lstr2 + type: N_SECT + sect: 1 + value: 0x0000000000000006 +undefined-symbols: + - name: ___CFConstantStringClassReference + type: N_UNDF + scope: [ N_EXT ] + value: 0x0000000000000000 +... + +# CHECK: defined-atoms: +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 68, 65, 6C, 6C, 6F, 00 ] +# CHECK: merge: by-content +# CHECK: - scope: hidden +# CHECK: type: c-string +# CHECK: content: [ 74, 68, 65, 72, 65, 00 ] +# CHECK: merge: by-content +# CHECK: - scope: hidden +# CHECK: type: cfstring +# CHECK: merge: by-content +# CHECK: - scope: hidden +# CHECK: type: cfstring +# CHECK: merge: by-content +# CHECK:undefined-atoms: +# CHECK: - name: ___CFConstantStringClassReference -- 2.7.4