From f9649d123db64e38bce7bcb4cc5791239820f3e2 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 9 Jun 2021 09:01:35 +1000 Subject: [PATCH] [JITLink][MachO] Split C-string literal sections on null-terminators. MachO C-string literal sections should be split on null-terminator boundaries, rather than the usual symbol boundaries. This patch updates MachOLinkGraphBuilder to do that. --- .../JITLink/MachOLinkGraphBuilder.cpp | 161 +++++++++++++++++---- .../JITLink/MachOLinkGraphBuilder.h | 12 ++ 2 files changed, 148 insertions(+), 25 deletions(-) diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp index 59db2ad..904f38a 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp @@ -411,10 +411,16 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { << " as it has a custom parser.\n"; }); continue; + } else if ((NSec.Flags & MachO::SECTION_TYPE) == + MachO::S_CSTRING_LITERALS) { + if (auto Err = graphifyCStringSection( + NSec, std::move(SecIndexToSymbols[SecIndex]))) + return Err; + continue; } else LLVM_DEBUG({ - dbgs() << " Processing section " << NSec.GraphSection->getName() - << "...\n"; + dbgs() << " Graphifying regular section " + << NSec.GraphSection->getName() << "...\n"; }); bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP; @@ -525,34 +531,14 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { bool SymLive = (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip; - LLVM_DEBUG({ - dbgs() << " " << formatv("{0:x16}", NSym.Value) << " -- " - << formatv("{0:x16}", SymEnd) << ": "; - if (!NSym.Name) - dbgs() << ""; - else - dbgs() << NSym.Name; - if (SymLive) - dbgs() << " [no-dead-strip]"; - if (LastCanonicalAddr == NSym.Value) - dbgs() << " [non-canonical]"; - dbgs() << "\n"; - }); + auto &Sym = createStandardGraphSymbol(NSym, B, SymEnd - NSym.Value, + SectionIsText, SymLive, + LastCanonicalAddr != NSym.Value); - auto &Sym = - NSym.Name - ? G->addDefinedSymbol(B, NSym.Value - BlockStart, *NSym.Name, - SymEnd - NSym.Value, NSym.L, NSym.S, - SectionIsText, SymLive) - : G->addAnonymousSymbol(B, NSym.Value - BlockStart, - SymEnd - NSym.Value, SectionIsText, - SymLive); - NSym.GraphSymbol = &Sym; if (LastCanonicalAddr != Sym.getAddress()) { if (LastCanonicalAddr) SymEnd = *LastCanonicalAddr; LastCanonicalAddr = Sym.getAddress(); - setCanonicalSymbol(Sym); } } } @@ -561,6 +547,41 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { return Error::success(); } +Symbol &MachOLinkGraphBuilder::createStandardGraphSymbol(NormalizedSymbol &NSym, + Block &B, size_t Size, + bool IsText, + bool IsNoDeadStrip, + bool IsCanonical) { + + LLVM_DEBUG({ + dbgs() << " " << formatv("{0:x16}", NSym.Value) << " -- " + << formatv("{0:x16}", NSym.Value + Size) << ": "; + if (!NSym.Name) + dbgs() << ""; + else + dbgs() << NSym.Name; + if (IsText) + dbgs() << " [text]"; + if (IsNoDeadStrip) + dbgs() << " [no-dead-strip]"; + if (!IsCanonical) + dbgs() << " [non-canonical]"; + dbgs() << "\n"; + }); + + auto &Sym = NSym.Name ? G->addDefinedSymbol(B, NSym.Value - B.getAddress(), + *NSym.Name, Size, NSym.L, NSym.S, + IsText, IsNoDeadStrip) + : G->addAnonymousSymbol(B, NSym.Value - B.getAddress(), + Size, IsText, IsNoDeadStrip); + NSym.GraphSymbol = &Sym; + + if (IsCanonical) + setCanonicalSymbol(Sym); + + return Sym; +} + Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() { // Graphify special sections. for (auto &KV : IndexToSection) { @@ -581,5 +602,95 @@ Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() { return Error::success(); } +Error MachOLinkGraphBuilder::graphifyCStringSection( + NormalizedSection &NSec, std::vector NSyms) { + + assert(NSec.GraphSection && "C string literal section missing graph section"); + assert(NSec.Data && "C string literal section has no data"); + + LLVM_DEBUG({ + dbgs() << " Graphifying C-string literal section " + << NSec.GraphSection->getName() << "\n"; + }); + + if (NSec.Data[NSec.Size - 1] != '\0') + return make_error("C string literal section " + + NSec.GraphSection->getName() + + " does not end with null terminator"); + + /// Sort into reverse order to use as a stack. + llvm::sort(NSyms, + [](const NormalizedSymbol *LHS, const NormalizedSymbol *RHS) { + if (LHS->Value != RHS->Value) + return LHS->Value > RHS->Value; + if (LHS->L != RHS->L) + return LHS->L > RHS->L; + if (LHS->S != RHS->S) + return LHS->S > RHS->S; + if (RHS->Name) { + if (!LHS->Name) + return true; + return *LHS->Name > *RHS->Name; + } + return false; + }); + + bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP; + bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; + JITTargetAddress BlockStart = 0; + + // Scan section for null characters. + for (size_t I = 0; I != NSec.Size; ++I) + if (NSec.Data[I] == '\0') { + JITTargetAddress BlockEnd = I + 1; + size_t BlockSize = BlockEnd - BlockStart; + // Create a block for this null terminated string. + auto &B = G->createContentBlock(*NSec.GraphSection, + {NSec.Data + BlockStart, BlockSize}, + NSec.Address + BlockStart, 1, 0); + + LLVM_DEBUG({ + dbgs() << " Created block " << formatv("{0:x}", B.getAddress()) + << " -- " << formatv("{0:x}", B.getAddress() + B.getSize()) + << " for \"" << StringRef(B.getContent().data()) << "\"\n"; + }); + + // Process any symbols that point into this block. + JITTargetAddress LastCanonicalAddr = BlockEnd; + Symbol *BlockStartSymbol = nullptr; + while (!NSyms.empty() && + NSyms.back()->Value < (B.getAddress() + BlockSize)) { + auto &NSym = *NSyms.back(); + size_t SymSize = (B.getAddress() + BlockSize) - NSyms.back()->Value; + bool SymLive = + (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip; + + auto &S = + createStandardGraphSymbol(NSym, B, SymSize, SectionIsText, SymLive, + NSym.Value != LastCanonicalAddr); + + if (S.getAddress() == B.getAddress() && !BlockStartSymbol) + BlockStartSymbol = &S; + + NSyms.pop_back(); + } + + if (!BlockStartSymbol) { + auto &S = G->addAnonymousSymbol(B, 0, BlockSize, false, false); + setCanonicalSymbol(S); + LLVM_DEBUG({ + dbgs() << " Adding anonymous symbol for " + << formatv("{0:x16} -- {1:x16}", S.getAddress(), + S.getAddress() + BlockSize) + << "\n"; + }); + } + + BlockStart += BlockSize; + } + + return Error::success(); +} + } // end namespace jitlink } // end namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h index 73a1f40..a6df445 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h @@ -200,9 +200,21 @@ private: /// all defined symbols in sections without custom parsers. Error graphifyRegularSymbols(); + /// Create and return a graph symbol for the given normalized symbol. + /// + /// NSym's GraphSymbol member will be updated to point at the newly created + /// symbol. + Symbol &createStandardGraphSymbol(NormalizedSymbol &Sym, Block &B, + size_t Size, bool IsText, + bool IsNoDeadStrip, bool IsCanonical); + /// Create graph blocks and symbols for all sections. Error graphifySectionsWithCustomParsers(); + /// Graphify cstring section. + Error graphifyCStringSection(NormalizedSection &NSec, + std::vector NSyms); + // Put the BumpPtrAllocator first so that we don't free any of the underlying // memory until the Symbol/Addressable destructors have been run. BumpPtrAllocator Allocator; -- 2.7.4