From f74e93b60004ecc23832492264116c38249a87c7 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 29 Apr 2016 21:32:00 +0000 Subject: [PATCH] [Orc] Add ORC lazy-compilation support for AArch64. The ORC compile callbacks and indirect stubs APIs will now work for AArc64, allowing functions to be lazily compiled and/or updated. llvm-svn: 268112 --- .../ExecutionEngine/Orc/OrcArchitectureSupport.h | 31 +++++ .../ExecutionEngine/Orc/OrcArchitectureSupport.cpp | 144 +++++++++++++++++++++ 2 files changed, 175 insertions(+) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h index c8e4e26..3adee86 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h @@ -107,6 +107,37 @@ private: sys::OwningMemoryBlock StubsMem; }; +class OrcAArch64 { +public: + static const unsigned PointerSize = 8; + static const unsigned TrampolineSize = 12; + static const unsigned ResolverCodeSize = 0x6C; + + typedef GenericIndirectStubsInfo<8> IndirectStubsInfo; + + typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId); + + /// @brief Write the resolver code into the given memory. The user is be + /// responsible for allocating the memory and setting permissions. + static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry, + void *CallbackMgr); + + /// @brief Write the requsted number of trampolines into the given memory, + /// which must be big enough to hold 1 pointer, plus NumTrampolines + /// trampolines. + static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, + unsigned NumTrampolines); + + /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to + /// the nearest page size. + /// + /// E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k + /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513 + /// will return a block of 1024 (2-pages worth). + static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, + unsigned MinStubs, void *InitialPtrVal); +}; + /// @brief X86_64 support. /// /// X86_64 supports lazy JITing. diff --git a/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp index b51e3cd..c92a8d2 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp @@ -14,6 +14,150 @@ namespace llvm { namespace orc { +void OrcAArch64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn, + void *CallbackMgr) { + + const uint32_t ResolverCode[] = { + // resolver_entry: + 0xa9bf47fd, // 0x00: stp x29, x17, [sp, #-16]! + 0x910003fd, // 0x04: mov x29, sp + 0xa9bf73fb, // 0x08: stp x27, x28, [sp, #-16]! + 0xa9bf6bf9, // 0x0C: stp x25, x26, [sp, #-16]! + 0xa9bf63f7, // 0x10: stp x23, x24, [sp, #-16]! + 0xa9bf5bf5, // 0x14: stp x21, x22, [sp, #-16]! + 0xa9bf53f3, // 0x18: stp x19, x20, [sp, #-16]! + 0xa9bf3fee, // 0x1C: stp x14, x15, [sp, #-16]! + 0xa9bf37ec, // 0x20: stp x12, x13, [sp, #-16]! + 0xa9bf2fea, // 0x24: stp x10, x11, [sp, #-16]! + 0xa9bf27e8, // 0x28: stp x8, x9, [sp, #-16]! + 0xa9bf1fe6, // 0x2C: stp x6, x7, [sp, #-16]! + 0xa9bf17e4, // 0x30: stp x4, x5, [sp, #-16]! + 0xa9bf0fe2, // 0x34: stp x2, x3, [sp, #-16]! + 0xa9bf07e0, // 0x38: stp x0, x1, [sp, #-16]! + 0x580002e0, // 0x3C: ldr x0, Lcallback_mgr + 0xaa1e03e1, // 0x40: mov x1, x30 + 0xd1003021, // 0x44: sub x1, x1, #12 + 0x58000242, // 0x48: ldr x2, Lreentry_fn + 0xd63f0040, // 0x4C: blr x2 + 0xaa0003f1, // 0x50: mov x17, x0 + 0xa8c107e0, // 0x54: ldp x0, x1, [sp], #16 + 0xa8c10fe2, // 0x58: ldp x2, x3, [sp], #16 + 0xa8c117e4, // 0x5C: ldp x4, x5, [sp], #16 + 0xa8c11fe6, // 0x60: ldp x6, x7, [sp], #16 + 0xa8c127e8, // 0x64: ldp x8, x9, [sp], #16 + 0xa8c12fea, // 0x68: ldp x10, x11, [sp], #16 + 0xa8c137ec, // 0x6C: ldp x12, x13, [sp], #16 + 0xa8c13fee, // 0x70: ldp x14, x15, [sp], #16 + 0xa8c153f3, // 0x74: ldp x19, x20, [sp], #16 + 0xa8c15bf5, // 0x78: ldp x21, x22, [sp], #16 + 0xa8c163f7, // 0x7C: ldp x23, x24, [sp], #16 + 0xa8c16bf9, // 0x80: ldp x25, x26, [sp], #16 + 0xa8c173fb, // 0x84: ldp x27, x28, [sp], #16 + 0xa8c17bfd, // 0x88: ldp x29, x30, [sp], #16 + 0xd65f0220, // 0x8C: ret x17 + 0x00000000, // 0x90: Lresolver_fn: + 0x00000000, // .quad resolver_fn + 0x00000000, // 0x98: Lcallback_mgr: + 0x00000000, // .quad callback_mgr + }; + + const unsigned ReentryFnAddrOffset = 0x90; + const unsigned CallbackMgrAddrOffset = 0x98; + + memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode)); + memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn)); + memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr, + sizeof(CallbackMgr)); +} + +void OrcAArch64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, + unsigned NumTrampolines) { + + unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8); + + memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void *)); + + // OffsetToPtr is actually the offset from the PC for the 2nd instruction, so + // subtract 32-bits. + OffsetToPtr -= 4; + + uint32_t *Trampolines = reinterpret_cast(TrampolineMem); + + for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { + Trampolines[3 * I + 0] = 0xaa1e03f1; // mov x17, x30 + Trampolines[3 * I + 1] = 0x58000010 | (OffsetToPtr << 3); // mov x16, Lptr + Trampolines[3 * I + 2] = 0xd63f0200; // blr x16 + } + +} + +Error OrcAArch64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, + unsigned MinStubs, + void *InitialPtrVal) { + // Stub format is: + // + // .section __orc_stubs + // stub1: + // ldr x0, ptr1 ; PC-rel load of ptr1 + // br x0 ; Jump to resolver + // stub2: + // ldr x0, ptr2 ; PC-rel load of ptr2 + // br x0 ; Jump to resolver + // + // ... + // + // .section __orc_ptrs + // ptr1: + // .quad 0x0 + // ptr2: + // .quad 0x0 + // + // ... + + const unsigned StubSize = IndirectStubsInfo::StubSize; + + // Emit at least MinStubs, rounded up to fill the pages allocated. + unsigned PageSize = sys::Process::getPageSize(); + unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize; + unsigned NumStubs = (NumPages * PageSize) / StubSize; + + // Allocate memory for stubs and pointers in one call. + std::error_code EC; + auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory( + 2 * NumPages * PageSize, nullptr, + sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); + + if (EC) + return errorCodeToError(EC); + + // Create separate MemoryBlocks representing the stubs and pointers. + sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize); + sys::MemoryBlock PtrsBlock(static_cast(StubsMem.base()) + + NumPages * PageSize, + NumPages * PageSize); + + // Populate the stubs page stubs and mark it executable. + uint64_t *Stub = reinterpret_cast(StubsBlock.base()); + uint64_t PtrOffsetField = static_cast(NumPages * PageSize) + << 3; + + for (unsigned I = 0; I < NumStubs; ++I) + Stub[I] = 0xd61f020058000010 | PtrOffsetField; + + if (auto EC = sys::Memory::protectMappedMemory( + StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC)) + return errorCodeToError(EC); + + // Initialize all pointers to point at FailureAddress. + void **Ptr = reinterpret_cast(PtrsBlock.base()); + for (unsigned I = 0; I < NumStubs; ++I) + Ptr[I] = InitialPtrVal; + + StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem)); + + return Error::success(); +} + void OrcX86_64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn, void *CallbackMgr) { -- 2.7.4