From 0dd40cf28d2ea2a8b18a1379a10019f343c51d74 Mon Sep 17 00:00:00 2001 From: Kostya Kortchinsky Date: Thu, 25 May 2017 16:19:57 +0000 Subject: [PATCH] [sanitizer] Change the 32-bit Primary AllocateRegion to reduce fragmentation Summary: Currently, AllocateRegion has a tendency to fragment memory: it allocates `2*kRegionSize`, and if the memory is aligned, will unmap `kRegionSize` bytes, thus creating a hole, which can't itself be reused for another region. This is exacerbated by the fact that if 2 regions get allocated one after another without any `mmap` in between, the second will be aligned due to mappings generally being contiguous. An idea, suggested by @alekseyshl, to prevent such a behavior is to have a stash of regions: if the `2*kRegionSize` allocation is properly aligned, split it in two, and stash the second part to be returned next time a region is requested. At this point, I thought about a couple of ways to implement this: - either an `IntrusiveList` of regions candidates, storing `next` at the begining of the region; - a small array of regions candidates existing in the Primary. While the second option is more constrained in terms of size, it offers several advantages: - security wise, a pointer in a region candidate could be overflowed into, and abused when popping an element; - we do not dirty the first page of the region by storing something in it; - unless several threads request regions simultaneously from different size classes, the stash rarely goes above 1 entry. I am not certain about the Windows impact of this change, as `sanitizer_win.cc` has its own version of MmapAlignedOrDie, maybe someone could chime in on this. MmapAlignedOrDie is effectively unused after this change and could be removed at a later point. I didn't notice any sizeable performance gain, even though we are saving a few `mmap`/`munmap` syscalls. Reviewers: alekseyshl, kcc, dvyukov Reviewed By: alekseyshl Subscribers: llvm-commits, kubamracek Differential Revision: https://reviews.llvm.org/D33454 llvm-svn: 303879 --- .../sanitizer_allocator_primary32.h | 63 ++++++++++++++++++---- 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h index 0f6f4f7..d006e64 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h @@ -24,7 +24,7 @@ template struct SizeClassAllocator32LocalCache; // be returned by MmapOrDie(). // // Region: -// a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize). +// a result of an allocation of kRegionSize bytes aligned on kRegionSize. // Since the regions are aligned by kRegionSize, there are exactly // kNumPossibleRegions possible regions in the address space and so we keep // a ByteMap possible_regions to store the size classes of each Region. @@ -106,6 +106,7 @@ class SizeClassAllocator32 { void Init(s32 release_to_os_interval_ms) { possible_regions.TestOnlyInit(); internal_memset(size_class_info_array, 0, sizeof(size_class_info_array)); + num_stashed_regions = 0; } s32 ReleaseToOSIntervalMs() const { @@ -275,15 +276,52 @@ class SizeClassAllocator32 { return mem & ~(kRegionSize - 1); } + // Allocates a region of kRegionSize bytes, aligned on kRegionSize, by first + // allocating 2 * kRegionSize. If the result of the initial allocation is + // aligned, split it in two, and attempt to store the second part into a + // stash. In the event the stash is full, just unmap the superfluous memory. + // If the initial allocation is not aligned, trim the memory before and after. + uptr AllocateRegionSlow(AllocatorStats *stat) { + uptr map_size = 2 * kRegionSize; + uptr map_res = (uptr)MmapOrDie(map_size, "SizeClassAllocator32"); + uptr region = map_res; + bool trim_region = true; + if (IsAligned(region, kRegionSize)) { + // We are aligned, attempt to stash the second half. + SpinMutexLock l(®ions_stash_mutex); + if (num_stashed_regions < kMaxStashedRegions) { + regions_stash[num_stashed_regions++] = region + kRegionSize; + trim_region = false; + } + } + // Trim the superfluous memory in front and behind us. + if (trim_region) { + // If map_res is already aligned on kRegionSize (in the event of a full + // stash), the following two lines amount to a no-op. + region = (map_res + kRegionSize - 1) & ~(kRegionSize - 1); + UnmapOrDie((void*)map_res, region - map_res); + uptr end = region + kRegionSize; + UnmapOrDie((void*)end, map_res + map_size - end); + map_size = kRegionSize; + } + MapUnmapCallback().OnMap(region, map_size); + stat->Add(AllocatorStatMapped, map_size); + return region; + } + uptr AllocateRegion(AllocatorStats *stat, uptr class_id) { CHECK_LT(class_id, kNumClasses); - uptr res = reinterpret_cast(MmapAlignedOrDie(kRegionSize, kRegionSize, - "SizeClassAllocator32")); - MapUnmapCallback().OnMap(res, kRegionSize); - stat->Add(AllocatorStatMapped, kRegionSize); - CHECK_EQ(0U, (res & (kRegionSize - 1))); - possible_regions.set(ComputeRegionId(res), static_cast(class_id)); - return res; + uptr region = 0; + { + SpinMutexLock l(®ions_stash_mutex); + if (num_stashed_regions > 0) + region = regions_stash[--num_stashed_regions]; + } + if (!region) + region = AllocateRegionSlow(stat); + CHECK(IsAligned(region, kRegionSize)); + possible_regions.set(ComputeRegionId(region), static_cast(class_id)); + return region; } SizeClassInfo *GetSizeClassInfo(uptr class_id) { @@ -316,8 +354,13 @@ class SizeClassAllocator32 { } } + // Unless several threads request regions simultaneously from different size + // classes, the stash rarely contains more than 1 entry. + static const uptr kMaxStashedRegions = 8; + SpinMutex regions_stash_mutex; + uptr num_stashed_regions; + uptr regions_stash[kMaxStashedRegions]; + ByteMap possible_regions; SizeClassInfo size_class_info_array[kNumClasses]; }; - - -- 2.7.4