From b05ba93578dd366f7cd1a5181470566cd523528a Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Thu, 28 Feb 2019 02:47:34 +0000 Subject: [PATCH] [Memory] Add basic support for large/huge memory pages This patch introduces Memory::MF_HUGE_HINT which indicates that allocateMappedMemory() shall return a pointer to a large memory page. However the flag is a hint because we're not guaranteed in any way that we will get back a large memory page. There are several restrictions: - Large/huge memory pages aren't enabled by default on modern OSes (Windows 10 and Linux at least), and should be manually enabled/reserved. - Once enabled, it should be kept in mind that large pages are physical only, they can't be swapped. - Memory fragmentation can affect the availability of large pages, especially after running the OS for a long time and/or running along many other applications. Memory::allocateMappedMemory() will fallback to 4KB pages if it can't allocate 2MB large pages (if Memory::MF_HUGE_HINT is provided) Currently, Memory::MF_HUGE_HINT only works on Windows. The hint will be ignored on Linux, 4KB pages will always be returned. Differential Revision: https://reviews.llvm.org/D58718 llvm-svn: 355065 --- llvm/include/llvm/Support/Memory.h | 7 +++- llvm/lib/Support/Unix/Memory.inc | 3 +- llvm/lib/Support/Windows/Memory.inc | 72 ++++++++++++++++++++++++++--------- llvm/unittests/Support/MemoryTest.cpp | 16 ++++++++ 4 files changed, 77 insertions(+), 21 deletions(-) diff --git a/llvm/include/llvm/Support/Memory.h b/llvm/include/llvm/Support/Memory.h index 2f4cafe..926a762 100644 --- a/llvm/include/llvm/Support/Memory.h +++ b/llvm/include/llvm/Support/Memory.h @@ -35,6 +35,7 @@ namespace sys { private: void *Address; ///< Address of first byte of memory area size_t Size; ///< Size, in bytes of the memory area + unsigned Flags = 0; friend class Memory; }; @@ -45,9 +46,11 @@ namespace sys { class Memory { public: enum ProtectionFlags { - MF_READ = 0x1000000, + MF_READ = 0x1000000, MF_WRITE = 0x2000000, - MF_EXEC = 0x4000000 + MF_EXEC = 0x4000000, + MF_RWE_MASK = 0x7000000, + MF_HUGE_HINT = 0x0000001 }; /// This method allocates a block of memory that is suitable for loading diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc index 3c4d324..3473b48 100644 --- a/llvm/lib/Support/Unix/Memory.inc +++ b/llvm/lib/Support/Unix/Memory.inc @@ -45,7 +45,7 @@ extern "C" void __clear_cache(void *, void*); namespace { int getPosixProtectionFlags(unsigned Flags) { - switch (Flags) { + switch (Flags & llvm::sys::Memory::MF_RWE_MASK) { case llvm::sys::Memory::MF_READ: return PROT_READ; case llvm::sys::Memory::MF_WRITE: @@ -114,6 +114,7 @@ Memory::allocateMappedMemory(size_t NumBytes, if (Start && Start % PageSize) Start += PageSize - Start % PageSize; + // FIXME: Handle huge page requests (MF_HUGE_HINT). void *Addr = ::mmap(reinterpret_cast(Start), NumBytes, Protect, MMFlags, fd, 0); if (Addr == MAP_FAILED) { diff --git a/llvm/lib/Support/Windows/Memory.inc b/llvm/lib/Support/Windows/Memory.inc index c9340c1..7153bf9 100644 --- a/llvm/lib/Support/Windows/Memory.inc +++ b/llvm/lib/Support/Windows/Memory.inc @@ -22,7 +22,7 @@ namespace { DWORD getWindowsProtectionFlags(unsigned Flags) { - switch (Flags) { + switch (Flags & llvm::sys::Memory::MF_RWE_MASK) { // Contrary to what you might expect, the Windows page protection flags // are not a bitwise combination of RWX values case llvm::sys::Memory::MF_READ: @@ -47,6 +47,9 @@ DWORD getWindowsProtectionFlags(unsigned Flags) { return PAGE_NOACCESS; } +// While we'd be happy to allocate single pages, the Windows allocation +// granularity may be larger than a single page (in practice, it is 64K) +// so mapping less than that will create an unreachable fragment of memory. size_t getAllocationGranularity() { SYSTEM_INFO Info; ::GetSystemInfo(&Info); @@ -56,6 +59,38 @@ size_t getAllocationGranularity() { return Info.dwAllocationGranularity; } +// Large/huge memory pages need explicit process permissions in order to be +// used. See https://blogs.msdn.microsoft.com/oldnewthing/20110128-00/?p=11643 +// Also large pages need to be manually enabled on your OS. If all this is +// sucessfull, we return the minimal large memory page size. +static size_t enableProcessLargePages() { + HANDLE Token = 0; + size_t LargePageMin = GetLargePageMinimum(); + if (LargePageMin) + OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, + &Token); + if (!Token) + return 0; + LUID Luid; + if (!LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &Luid)) { + CloseHandle(Token); + return 0; + } + TOKEN_PRIVILEGES TP{}; + TP.PrivilegeCount = 1; + TP.Privileges[0].Luid = Luid; + TP.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + if (!AdjustTokenPrivileges(Token, FALSE, &TP, 0, 0, 0)) { + CloseHandle(Token); + return 0; + } + DWORD E = GetLastError(); + CloseHandle(Token); + if (E == ERROR_SUCCESS) + return LargePageMin; + return 0; +} + } // namespace namespace llvm { @@ -74,19 +109,20 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes, if (NumBytes == 0) return MemoryBlock(); - // While we'd be happy to allocate single pages, the Windows allocation - // granularity may be larger than a single page (in practice, it is 64K) - // so mapping less than that will create an unreachable fragment of memory. - // Avoid using one-time initialization of static locals here, since they - // aren't thread safe with MSVC. - static volatile size_t GranularityCached; - size_t Granularity = GranularityCached; - if (Granularity == 0) { - Granularity = getAllocationGranularity(); - GranularityCached = Granularity; + static size_t DefaultGranularity = getAllocationGranularity(); + static Optional LargePageGranularity = enableProcessLargePages(); + + DWORD AllocType = MEM_RESERVE | MEM_COMMIT; + bool HugePages = false; + size_t Granularity = DefaultGranularity; + + if ((Flags & MF_HUGE_HINT) && LargePageGranularity.hasValue()) { + AllocType |= MEM_LARGE_PAGES; + HugePages = true; + Granularity = *LargePageGranularity; } - const size_t NumBlocks = (NumBytes+Granularity-1)/Granularity; + size_t NumBlocks = (NumBytes + Granularity - 1) / Granularity; uintptr_t Start = NearBlock ? reinterpret_cast(NearBlock->base()) + NearBlock->size() @@ -99,13 +135,12 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes, DWORD Protect = getWindowsProtectionFlags(Flags); - void *PA = ::VirtualAlloc(reinterpret_cast(Start), - NumBlocks*Granularity, - MEM_RESERVE | MEM_COMMIT, Protect); + void *PA = ::VirtualAlloc(reinterpret_cast(Start), + NumBlocks * Granularity, AllocType, Protect); if (PA == NULL) { - if (NearBlock) { - // Try again without the NearBlock hint - return allocateMappedMemory(NumBytes, NULL, Flags, EC); + if (NearBlock || HugePages) { + // Try again without the NearBlock hint and without large memory pages + return allocateMappedMemory(NumBytes, NULL, Flags & ~MF_HUGE_HINT, EC); } EC = mapWindowsError(::GetLastError()); return MemoryBlock(); @@ -114,6 +149,7 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes, MemoryBlock Result; Result.Address = PA; Result.Size = NumBlocks*Granularity; + Result.Flags = (Flags & ~MF_HUGE_HINT) | (HugePages ? MF_HUGE_HINT : 0); if (Flags & MF_EXEC) Memory::InvalidateInstructionCache(Result.Address, Result.Size); diff --git a/llvm/unittests/Support/MemoryTest.cpp b/llvm/unittests/Support/MemoryTest.cpp index 473a0da..33cc3ed 100644 --- a/llvm/unittests/Support/MemoryTest.cpp +++ b/llvm/unittests/Support/MemoryTest.cpp @@ -105,6 +105,22 @@ TEST_P(MappedMemoryTest, AllocAndRelease) { EXPECT_FALSE(Memory::releaseMappedMemory(M1)); } +TEST_P(MappedMemoryTest, AllocAndReleaseHuge) { + CHECK_UNSUPPORTED(); + std::error_code EC; + MemoryBlock M1 = Memory::allocateMappedMemory( + sizeof(int), nullptr, Flags | Memory::MF_HUGE_HINT, EC); + EXPECT_EQ(std::error_code(), EC); + + // Test large/huge memory pages. In the worst case, 4kb pages should be + // returned, if large pages aren't available. + + EXPECT_NE((void *)nullptr, M1.base()); + EXPECT_LE(sizeof(int), M1.size()); + + EXPECT_FALSE(Memory::releaseMappedMemory(M1)); +} + TEST_P(MappedMemoryTest, MultipleAllocAndRelease) { CHECK_UNSUPPORTED(); std::error_code EC; -- 2.7.4