From 6b2a96285b9bbe92d2c5e21830f21458f8be976d Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 21 Jul 2021 17:38:35 +1000 Subject: [PATCH] Re-re-apply "[ORC][ORC-RT] Add initial native-TLV support to MachOPlatform." This reapplies commit a7733e9556b5a6334c910f88bcd037e84e17e3fc ("Re-apply [ORC][ORC-RT] Add initial native-TLV support to MachOPlatform."), and d4abdefc998a1ee19d5edc79ec233774cbf64f6a ("[ORC-RT] Rename macho_tlv.x86-64.s to macho_tlv.x86-64.S (uppercase suffix)"). These patches were reverted in 48aa82cacbff10e1c5395a03f86488bf449ba4da while I investigated bot failures (e.g. https://lab.llvm.org/buildbot/#/builders/109/builds/18981). The fix was to disable building of the ORC runtime on buliders using ccache (which is the same fix used for other compiler-rt projects containing assembly code). This fix was commited to llvm-zorg in 490633945677656ba75d42ff1ca9d4a400b7b243. --- compiler-rt/lib/orc/CMakeLists.txt | 1 + compiler-rt/lib/orc/macho_platform.cpp | 121 +++++++++++++++++++++ compiler-rt/lib/orc/macho_platform.h | 10 +- compiler-rt/lib/orc/macho_tlv.x86-64.S | 68 ++++++++++++ .../test/orc/TestCases/Darwin/x86-64/trivial-tlv.S | 63 +++++++++++ .../llvm/ExecutionEngine/Orc/MachOPlatform.h | 20 +++- llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp | 11 +- llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp | 120 +++++++++++++++++++- 8 files changed, 395 insertions(+), 19 deletions(-) create mode 100644 compiler-rt/lib/orc/macho_tlv.x86-64.S create mode 100644 compiler-rt/test/orc/TestCases/Darwin/x86-64/trivial-tlv.S diff --git a/compiler-rt/lib/orc/CMakeLists.txt b/compiler-rt/lib/orc/CMakeLists.txt index 0a83787..22381d8 100644 --- a/compiler-rt/lib/orc/CMakeLists.txt +++ b/compiler-rt/lib/orc/CMakeLists.txt @@ -11,6 +11,7 @@ set(ORC_SOURCES # Implementation files for all ORC architectures. set(x86_64_SOURCES # x86-64 specific assembly files will go here. + macho_tlv.x86-64.S ) set(ORC_IMPL_HEADERS diff --git a/compiler-rt/lib/orc/macho_platform.cpp b/compiler-rt/lib/orc/macho_platform.cpp index 8a3f8d9..c84ff78 100644 --- a/compiler-rt/lib/orc/macho_platform.cpp +++ b/compiler-rt/lib/orc/macho_platform.cpp @@ -85,6 +85,12 @@ Error runModInits(const std::vector &ModInitsSections, return Error::success(); } +struct TLVDescriptor { + void *(*Thunk)(TLVDescriptor *) = nullptr; + unsigned long Key = 0; + unsigned long DataAddress = 0; +}; + class MachOPlatformRuntimeState { private: struct AtExitEntry { @@ -126,11 +132,17 @@ public: int registerAtExit(void (*F)(void *), void *Arg, void *DSOHandle); void runAtExits(void *DSOHandle); + /// Returns the base address of the section containing ThreadData. + Expected> + getThreadDataSectionFor(const char *ThreadData); + private: PerJITDylibState *getJITDylibStateByHeaderAddr(void *DSOHandle); PerJITDylibState *getJITDylibStateByName(string_view Path); PerJITDylibState &getOrCreateJITDylibState(MachOJITDylibInitializers &MOJDIs); + Error registerThreadDataSection(span ThreadDataSec); + Expected lookupSymbolInJITDylib(void *DSOHandle, string_view Symbol); @@ -153,6 +165,9 @@ private: std::recursive_mutex JDStatesMutex; std::unordered_map JDStates; std::unordered_map JDNameToHeader; + + std::mutex ThreadDataSectionsMutex; + std::map ThreadDataSections; }; MachOPlatformRuntimeState *MachOPlatformRuntimeState::MOPS = nullptr; @@ -178,6 +193,12 @@ Error MachOPlatformRuntimeState::registerObjectSections( walkEHFrameSection(POSR.EHFrameSection.toSpan(), __register_frame); + if (POSR.ThreadDataSection.StartAddress) { + if (auto Err = registerThreadDataSection( + POSR.ThreadDataSection.toSpan())) + return Err; + } + return Error::success(); } @@ -256,6 +277,19 @@ void MachOPlatformRuntimeState::runAtExits(void *DSOHandle) { } } +Expected> +MachOPlatformRuntimeState::getThreadDataSectionFor(const char *ThreadData) { + std::lock_guard Lock(ThreadDataSectionsMutex); + auto I = ThreadDataSections.upper_bound(ThreadData); + // Check that we have a valid entry covering this address. + if (I == ThreadDataSections.begin()) + return make_error("No thread local data section for key"); + I = std::prev(I); + if (ThreadData >= I->first + I->second) + return make_error("No thread local data section for key"); + return *I; +} + MachOPlatformRuntimeState::PerJITDylibState * MachOPlatformRuntimeState::getJITDylibStateByHeaderAddr(void *DSOHandle) { auto I = JDStates.find(DSOHandle); @@ -295,6 +329,20 @@ MachOPlatformRuntimeState::getOrCreateJITDylibState( return JDS; } +Error MachOPlatformRuntimeState::registerThreadDataSection( + span ThreadDataSection) { + std::lock_guard Lock(ThreadDataSectionsMutex); + auto I = ThreadDataSections.upper_bound(ThreadDataSection.data()); + if (I != ThreadDataSections.begin()) { + auto J = std::prev(I); + if (J->first + J->second > ThreadDataSection.data()) + return make_error("Overlapping __thread_data sections"); + } + ThreadDataSections.insert( + I, std::make_pair(ThreadDataSection.data(), ThreadDataSection.size())); + return Error::success(); +} + Expected MachOPlatformRuntimeState::lookupSymbolInJITDylib(void *DSOHandle, string_view Sym) { @@ -367,6 +415,45 @@ Error MachOPlatformRuntimeState::initializeJITDylib( return Error::success(); } +class MachOPlatformRuntimeTLVManager { +public: + void *getInstance(const char *ThreadData); + +private: + std::unordered_map Instances; + std::unordered_map> AllocatedSections; +}; + +void *MachOPlatformRuntimeTLVManager::getInstance(const char *ThreadData) { + auto I = Instances.find(ThreadData); + if (I != Instances.end()) + return I->second; + + auto TDS = + MachOPlatformRuntimeState::get().getThreadDataSectionFor(ThreadData); + if (!TDS) { + __orc_rt_log_error(toString(TDS.takeError()).c_str()); + return nullptr; + } + + auto &Allocated = AllocatedSections[TDS->first]; + if (!Allocated) { + Allocated = std::make_unique(TDS->second); + memcpy(Allocated.get(), TDS->first, TDS->second); + } + + size_t ThreadDataDelta = ThreadData - TDS->first; + assert(ThreadDataDelta <= TDS->second && "ThreadData outside section bounds"); + + char *Instance = Allocated.get() + ThreadDataDelta; + Instances[ThreadData] = Instance; + return Instance; +} + +void destroyMachOTLVMgr(void *MachOTLVMgr) { + delete static_cast(MachOTLVMgr); +} + } // end anonymous namespace //------------------------------------------------------------------------------ @@ -410,6 +497,40 @@ __orc_rt_macho_deregister_object_sections(char *ArgData, size_t ArgSize) { } //------------------------------------------------------------------------------ +// TLV support +//------------------------------------------------------------------------------ + +ORC_RT_INTERFACE void *__orc_rt_macho_tlv_get_addr_impl(TLVDescriptor *D) { + auto *TLVMgr = static_cast( + pthread_getspecific(D->Key)); + if (!TLVMgr) { + TLVMgr = new MachOPlatformRuntimeTLVManager(); + if (pthread_setspecific(D->Key, TLVMgr)) { + __orc_rt_log_error("Call to pthread_setspecific failed"); + return nullptr; + } + } + + return TLVMgr->getInstance( + reinterpret_cast(static_cast(D->DataAddress))); +} + +ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult +__orc_rt_macho_create_pthread_key(char *ArgData, size_t ArgSize) { + return WrapperFunction(void)>::handle( + ArgData, ArgSize, + []() -> Expected { + pthread_key_t Key; + if (int Err = pthread_key_create(&Key, destroyMachOTLVMgr)) { + __orc_rt_log_error("Call to pthread_key_create failed"); + return make_error(strerror(Err)); + } + return static_cast(Key); + }) + .release(); +} + +//------------------------------------------------------------------------------ // cxa_atexit support //------------------------------------------------------------------------------ diff --git a/compiler-rt/lib/orc/macho_platform.h b/compiler-rt/lib/orc/macho_platform.h index b4abb50..e097c15 100644 --- a/compiler-rt/lib/orc/macho_platform.h +++ b/compiler-rt/lib/orc/macho_platform.h @@ -33,6 +33,7 @@ namespace macho { struct MachOPerObjectSectionsToRegister { ExecutorAddressRange EHFrameSection; + ExecutorAddressRange ThreadDataSection; }; struct MachOJITDylibInitializers { @@ -66,7 +67,8 @@ enum dlopen_mode : int { } // end namespace macho -using SPSMachOPerObjectSectionsToRegister = SPSTuple; +using SPSMachOPerObjectSectionsToRegister = + SPSTuple; template <> class SPSSerializationTraits createPThreadKey(); + ExecutionSession &ES; ObjectLinkingLayer &ObjLinkingLayer; ExecutorProcessControl &EPC; @@ -223,6 +228,7 @@ private: ExecutorAddress orc_rt_macho_platform_bootstrap; ExecutorAddress orc_rt_macho_platform_shutdown; ExecutorAddress orc_rt_macho_register_object_sections; + ExecutorAddress orc_rt_macho_create_pthread_key; DenseMap RegisteredInitSymbols; @@ -233,11 +239,13 @@ private: std::vector BootstrapPOSRs; DenseMap HeaderAddrToJITDylib; + DenseMap JITDylibToPThreadKey; }; namespace shared { -using SPSMachOPerObjectSectionsToRegister = SPSTuple; +using SPSMachOPerObjectSectionsToRegister = + SPSTuple; template <> class SPSSerializationTraitsGraphSymbol; + else + return TargetSymbolOrErr.takeError(); + Addend = *(const little32_t *)FixupContent; + Kind = x86_64::RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable; + break; case MachOPointer32: if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) TargetSymbol = TargetSymbolOrErr->GraphSymbol; @@ -392,9 +400,6 @@ private: assert(TargetSymbol && "No target symbol from parsePairRelocation?"); break; } - case MachOPCRel32TLV: - return make_error( - "MachO TLV relocations not yet supported"); } LLVM_DEBUG({ diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index 2a65832..2d4848c 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -128,6 +128,9 @@ constexpr MachOHeaderMaterializationUnit::HeaderSymbol StringRef EHFrameSectionName = "__TEXT,__eh_frame"; StringRef ModInitFuncSectionName = "__DATA,__mod_init_func"; +StringRef ThreadBSSSectionName = "__DATA,__thread_bss"; +StringRef ThreadDataSectionName = "__DATA,__thread_data"; +StringRef ThreadVarsSectionName = "__DATA,__thread_vars"; StringRef InitSectionNames[] = {ModInitFuncSectionName}; @@ -467,7 +470,8 @@ Error MachOPlatform::bootstrapMachORuntime(JITDylib &PlatformJD) { {"___orc_rt_macho_platform_bootstrap", &orc_rt_macho_platform_bootstrap}, {"___orc_rt_macho_platform_shutdown", &orc_rt_macho_platform_shutdown}, {"___orc_rt_macho_register_object_sections", - &orc_rt_macho_register_object_sections}}; + &orc_rt_macho_register_object_sections}, + {"___orc_rt_macho_create_pthread_key", &orc_rt_macho_create_pthread_key}}; SymbolLookupSet RuntimeSymbols; std::vector> AddrsToRecord; @@ -562,6 +566,20 @@ Error MachOPlatform::registerPerObjectSections( return ErrResult; } +Expected MachOPlatform::createPThreadKey() { + if (!orc_rt_macho_create_pthread_key) + return make_error( + "Attempting to create pthread key in target, but runtime support has " + "not been loaded yet", + inconvertibleErrorCode()); + + Expected Result(0); + if (auto Err = EPC.runSPSWrapper(void)>( + orc_rt_macho_create_pthread_key.getValue(), Result)) + return std::move(Err); + return Result; +} + void MachOPlatform::MachOPlatformPlugin::modifyPassConfig( MaterializationResponsibility &MR, jitlink::LinkGraph &LG, jitlink::PassConfiguration &Config) { @@ -579,8 +597,8 @@ void MachOPlatform::MachOPlatformPlugin::modifyPassConfig( if (MR.getInitializerSymbol()) addInitializerSupportPasses(MR, Config); - // Add passes for eh-frame support. - addEHSupportPasses(MR, Config); + // Add passes for eh-frame and TLV support. + addEHAndTLVSupportPasses(MR, Config); } ObjectLinkingLayer::Plugin::SyntheticSymbolDependenciesMap @@ -634,10 +652,18 @@ void MachOPlatform::MachOPlatformPlugin::addMachOHeaderSupportPasses( }); } -void MachOPlatform::MachOPlatformPlugin::addEHSupportPasses( +void MachOPlatform::MachOPlatformPlugin::addEHAndTLVSupportPasses( MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) { - // Add a pass to register the final addresses of the eh-frame sections + // Insert TLV lowering at the start of the PostPrunePasses, since we want + // it to run before GOT/PLT lowering. + Config.PostPrunePasses.insert( + Config.PostPrunePasses.begin(), + [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) { + return fixTLVSectionsAndEdges(G, JD); + }); + + // Add a pass to register the final addresses of the eh-frame and TLV sections // with the runtime. Config.PostFixupPasses.push_back([this](jitlink::LinkGraph &G) -> Error { MachOPerObjectSectionsToRegister POSR; @@ -649,7 +675,33 @@ void MachOPlatform::MachOPlatformPlugin::addEHSupportPasses( ExecutorAddress(R.getEnd())}; } - if (POSR.EHFrameSection.StartAddress) { + // Get a pointer to the thread data section if there is one. It will be used + // below. + jitlink::Section *ThreadDataSection = + G.findSectionByName(ThreadDataSectionName); + + // Handle thread BSS section if there is one. + if (auto *ThreadBSSSection = G.findSectionByName(ThreadBSSSectionName)) { + // If there's already a thread data section in this graph then merge the + // thread BSS section content into it, otherwise just treat the thread + // BSS section as the thread data section. + if (ThreadDataSection) + G.mergeSections(*ThreadDataSection, *ThreadBSSSection); + else + ThreadDataSection = ThreadBSSSection; + } + + // Having merged thread BSS (if present) and thread data (if present), + // record the resulting section range. + if (ThreadDataSection) { + jitlink::SectionRange R(*ThreadDataSection); + if (!R.empty()) + POSR.ThreadDataSection = {ExecutorAddress(R.getStart()), + ExecutorAddress(R.getEnd())}; + } + + if (POSR.EHFrameSection.StartAddress || + POSR.ThreadDataSection.StartAddress) { // If we're still bootstrapping the runtime then just record this // frame for now. @@ -727,5 +779,61 @@ Error MachOPlatform::MachOPlatformPlugin::registerInitSections( return MP.registerInitInfo(JD, InitSections); } +Error MachOPlatform::MachOPlatformPlugin::fixTLVSectionsAndEdges( + jitlink::LinkGraph &G, JITDylib &JD) { + + // Rename external references to __tlv_bootstrap to ___orc_rt_tlv_get_addr. + for (auto *Sym : G.external_symbols()) + if (Sym->getName() == "__tlv_bootstrap") { + Sym->setName("___orc_rt_macho_tlv_get_addr"); + break; + } + + // Store key in __thread_vars struct fields. + if (auto *ThreadDataSec = G.findSectionByName(ThreadVarsSectionName)) { + Optional Key; + { + std::lock_guard Lock(MP.PlatformMutex); + auto I = MP.JITDylibToPThreadKey.find(&JD); + if (I != MP.JITDylibToPThreadKey.end()) + Key = I->second; + } + + if (!Key) { + if (auto KeyOrErr = MP.createPThreadKey()) + Key = *KeyOrErr; + else + return KeyOrErr.takeError(); + } + + uint64_t PlatformKeyBits = + support::endian::byte_swap(*Key, G.getEndianness()); + + for (auto *B : ThreadDataSec->blocks()) { + if (B->getSize() != 3 * G.getPointerSize()) + return make_error("__thread_vars block at " + + formatv("{0:x}", B->getAddress()) + + " has unexpected size", + inconvertibleErrorCode()); + + auto NewBlockContent = G.allocateBuffer(B->getSize()); + llvm::copy(B->getContent(), NewBlockContent.data()); + memcpy(NewBlockContent.data() + G.getPointerSize(), &PlatformKeyBits, + G.getPointerSize()); + B->setContent(NewBlockContent); + } + } + + // Transform any TLV edges into GOT edges. + for (auto *B : G.blocks()) + for (auto &E : B->edges()) + if (E.getKind() == + jitlink::x86_64::RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable) + E.setKind( + jitlink::x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable); + + return Error::success(); +} + } // End namespace orc. } // End namespace llvm. -- 2.7.4