From 1ab649649d1ab15ca3381dfd364e3e79c009a441 Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Tue, 9 Aug 2016 23:30:22 +0000 Subject: [PATCH] [sanitizer] use 32-bit offset instead of 64-bit pointers in the 64-bit allocator's transfer batches. This saves 2x memory for the transfer batches (up to ~1.5% overall in some cases) llvm-svn: 278179 --- compiler-rt/lib/msan/msan_allocator.cc | 2 +- .../sanitizer_allocator_local_cache.h | 12 +++--- .../sanitizer_allocator_primary32.h | 19 +++++++-- .../sanitizer_allocator_primary64.h | 45 ++++++++++++++++++---- .../sanitizer_allocator_size_class_map.h | 25 ++++++------ .../tests/sanitizer_allocator_test.cc | 2 +- 6 files changed, 75 insertions(+), 30 deletions(-) diff --git a/compiler-rt/lib/msan/msan_allocator.cc b/compiler-rt/lib/msan/msan_allocator.cc index b7d3947..fdde4b4 100644 --- a/compiler-rt/lib/msan/msan_allocator.cc +++ b/compiler-rt/lib/msan/msan_allocator.cc @@ -56,7 +56,7 @@ struct MsanMapUnmapCallback { #else static const uptr kAllocatorSpace = 0x600000000000ULL; #endif - static const uptr kAllocatorSize = 0x80000000000; // 8T. + static const uptr kAllocatorSize = 0x40000000000; // 4T. static const uptr kMetadataSize = sizeof(Metadata); static const uptr kMaxAllowedMallocSize = 8UL << 30; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h index 38d63a5..0ad22ba 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_local_cache.h @@ -74,7 +74,7 @@ struct SizeClassAllocatorLocalCache { struct PerClass { uptr count; uptr max_count; - void *batch[2 * SizeClassMap::kMaxNumCached]; + void *batch[2 * TransferBatch::kMaxNumCached]; }; PerClass per_class_[kNumClasses]; AllocatorStats stats_; @@ -84,7 +84,7 @@ struct SizeClassAllocatorLocalCache { return; for (uptr i = 0; i < kNumClasses; i++) { PerClass *c = &per_class_[i]; - c->max_count = 2 * SizeClassMap::MaxCached(i); + c->max_count = 2 * TransferBatch::MaxCached(i); } } @@ -109,9 +109,8 @@ struct SizeClassAllocatorLocalCache { ? 0 : SizeClassMap::kBatchClassID; if (Allocator::ClassIdToSize(class_id) < - sizeof(TransferBatch) - - sizeof(uptr) * (SizeClassMap::kMaxNumCached - - SizeClassMap::MaxCached(class_id))) + TransferBatch::AllocationSizeRequiredForNElements( + TransferBatch::MaxCached(class_id))) return SizeClassMap::ClassID(sizeof(TransferBatch)); return 0; } @@ -152,7 +151,8 @@ struct SizeClassAllocatorLocalCache { uptr first_idx_to_drain = c->count - cnt; TransferBatch *b = CreateBatch( class_id, allocator, (TransferBatch *)c->batch[first_idx_to_drain]); - b->SetFromArray(&c->batch[first_idx_to_drain], cnt); + b->SetFromArray(allocator->GetRegionBeginBySizeClass(class_id), + &c->batch[first_idx_to_drain], cnt); c->count -= cnt; allocator->DeallocateBatch(&stats_, class_id, b); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h index 58237ca..e5cf56f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h @@ -42,8 +42,8 @@ template free_list.push_front(b); } + uptr GetRegionBeginBySizeClass(uptr class_id) { return 0; } + bool PointerIsMine(const void *p) { uptr mem = reinterpret_cast(p); if (mem < kSpaceBeg || mem >= kSpaceBeg + kSpaceSize) @@ -262,7 +275,7 @@ class SizeClassAllocator32 { uptr size = ClassIdToSize(class_id); uptr reg = AllocateRegion(stat, class_id); uptr n_chunks = kRegionSize / (size + kMetadataSize); - uptr max_count = SizeClassMap::MaxCached(class_id); + uptr max_count = TransferBatch::MaxCached(class_id); TransferBatch *b = nullptr; for (uptr i = reg; i < reg + n_chunks * size; i += size) { if (!b) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h index 37a34e6..393a019 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h @@ -37,32 +37,55 @@ template ((beg_offset + i * step) >> 4); } - void SetFromArray(void *batch[], uptr count) { + void SetFromArray(uptr region_beg, void *batch[], uptr count) { count_ = count; CHECK_LE(count_, kMaxNumCached); + region_beg_ = region_beg; for (uptr i = 0; i < count; i++) - batch_[i] = batch[i]; + batch_[i] = static_cast( + ((reinterpret_cast(batch[i])) - region_beg) >> 4); } void CopyToArray(void *to_batch[]) { for (uptr i = 0, n = Count(); i < n; i++) - to_batch[i] = batch_[i]; + to_batch[i] = reinterpret_cast(Get(i)); } uptr Count() const { return count_; } + + // How much memory do we need for a batch containing n elements. + static uptr AllocationSizeRequiredForNElements(uptr n) { + return sizeof(uptr) * 2 + sizeof(u32) * n; + } + static uptr MaxCached(uptr class_id) { + return Min(kMaxNumCached, SizeClassMap::MaxCachedHint(class_id)); + } + TransferBatch *next; private: - uptr count_; - void *batch_[kMaxNumCached]; + uptr Get(uptr i) { + return region_beg_ + (static_cast(batch_[i]) << 4); + } + // Instead of storing 64-bit pointers we store 32-bit offsets from the + // region start divided by 4. This imposes two limitations: + // * all allocations are 16-aligned, + // * regions are not larger than 2^36. + uptr region_beg_ : SANITIZER_WORDSIZE - 10; // Region-beg is 4096-aligned. + uptr count_ : 10; + u32 batch_[kMaxNumCached]; }; static const uptr kBatchSize = sizeof(TransferBatch); COMPILER_CHECK((kBatchSize & (kBatchSize - 1)) == 0); + COMPILER_CHECK(sizeof(TransferBatch) == + SizeClassMap::kMaxNumCachedHint * sizeof(u32)); + COMPILER_CHECK(TransferBatch::kMaxNumCached < 1024); // count_ uses 10 bits. static uptr ClassIdToSize(uptr class_id) { return class_id == SizeClassMap::kBatchClassID @@ -136,6 +159,10 @@ class SizeClassAllocator64 { space_beg; } + uptr GetRegionBeginBySizeClass(uptr class_id) { + return SpaceBeg() + kRegionSize * class_id; + } + uptr GetSizeClass(const void *p) { if (kUsingConstantSpaceBeg && (kSpaceBeg % kSpaceSize) == 0) return ((reinterpret_cast(p)) / kRegionSize) % kNumClassesRounded; @@ -277,6 +304,8 @@ class SizeClassAllocator64 { uptr SpaceEnd() const { return SpaceBeg() + kSpaceSize; } // kRegionSize must be >= 2^32. COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2))); + // kRegionSize must be <= 2^36, see TransferBatch. + COMPILER_CHECK((kRegionSize) <= (1ULL << (SANITIZER_WORDSIZE / 2 + 4))); // Call mmap for user memory with at least this size. static const uptr kUserMapSize = 1 << 16; // Call mmap for metadata memory with at least this size. @@ -320,7 +349,7 @@ class SizeClassAllocator64 { if (b) return b; uptr size = ClassIdToSize(class_id); - uptr count = SizeClassMap::MaxCached(class_id); + uptr count = TransferBatch::MaxCached(class_id); uptr beg_idx = region->allocated_user; uptr end_idx = beg_idx + count * size; uptr region_beg = SpaceBeg() + kRegionSize * class_id; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_size_class_map.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_size_class_map.h index 3979b9d..b8917a4 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_size_class_map.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_size_class_map.h @@ -29,7 +29,8 @@ // // This class also gives a hint to a thread-caching allocator about the amount // of chunks that need to be cached per-thread: -// - kMaxNumCached is the maximal number of chunks per size class. +// - kMaxNumCachedHint is a hint for maximal number of chunks per size class. +// The actual number is computed in TransferBatch. // - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class. // // There is one extra size class kBatchClassID that is used for allocating @@ -78,7 +79,7 @@ // // c52 => s: 131072 diff: +16384 14% l 17 cached: 1 131072; id 52 -template +template class SizeClassMap { static const uptr kMinSizeLog = 4; static const uptr kMidSizeLog = kMinSizeLog + 4; @@ -89,8 +90,10 @@ class SizeClassMap { static const uptr M = (1 << S) - 1; public: - static const uptr kMaxNumCached = kMaxNumCachedT; - COMPILER_CHECK(((kMaxNumCached + 2) & (kMaxNumCached + 1)) == 0); + // kMaxNumCachedHintT is a power of two. It serves as a hint + // for the size of TransferBatch, the actual size could be a bit smaller. + static const uptr kMaxNumCachedHint = kMaxNumCachedHintT; + COMPILER_CHECK((kMaxNumCachedHint & (kMaxNumCachedHint - 1)) == 0); static const uptr kMaxSize = 1UL << kMaxSizeLog; static const uptr kNumClasses = @@ -124,15 +127,15 @@ class SizeClassMap { return kMidClass + (l1 << S) + hbits + (lbits > 0); } - static uptr MaxCached(uptr class_id) { + static uptr MaxCachedHint(uptr class_id) { if (class_id == 0) return 0; // Estimate the result for kBatchClassID because this class // does not know the exact size of TransferBatch. // Moreover, we need to cache fewer batches than user chunks, // so this number could be small. - if (class_id == kBatchClassID) return Min((uptr)8, kMaxNumCached); + if (class_id == kBatchClassID) return 8; uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id); - return Max(1, Min(kMaxNumCached, n)); + return Max(1, Min(kMaxNumCachedHint, n)); } static void Print() { @@ -145,12 +148,12 @@ class SizeClassMap { uptr d = s - prev_s; uptr p = prev_s ? (d * 100 / prev_s) : 0; uptr l = s ? MostSignificantSetBitIndex(s) : 0; - uptr cached = MaxCached(i) * s; + uptr cached = MaxCachedHint(i) * s; if (i == kBatchClassID) d = l = p = 0; Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd " "cached: %zd %zd; id %zd\n", - i, Size(i), d, p, l, MaxCached(i), cached, ClassID(s)); + i, Size(i), d, p, l, MaxCachedHint(i), cached, ClassID(s)); total_cached += cached; prev_s = s; } @@ -183,6 +186,6 @@ class SizeClassMap { } }; -typedef SizeClassMap<17, 126, 16> DefaultSizeClassMap; -typedef SizeClassMap<17, 62, 14> CompactSizeClassMap; +typedef SizeClassMap<17, 128, 16> DefaultSizeClassMap; +typedef SizeClassMap<17, 64, 14> CompactSizeClassMap; template struct SizeClassAllocatorLocalCache; diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cc b/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cc index 39b177d..a558f08 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cc +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cc @@ -841,7 +841,7 @@ TEST(SanitizerCommon, LargeMmapAllocatorBlockBegin) { // Regression test for out-of-memory condition in PopulateFreeList(). TEST(SanitizerCommon, SizeClassAllocator64PopulateFreeListOOM) { // In a world where regions are small and chunks are huge... - typedef SizeClassMap<63, 126, 16> SpecialSizeClassMap; + typedef SizeClassMap<63, 128, 16> SpecialSizeClassMap; typedef SizeClassAllocator64 SpecialAllocator64; const uptr kRegionSize = -- 2.7.4