From fc69967a4b98df34c03a3c57134940688b863dff Mon Sep 17 00:00:00 2001 From: Kostya Kortchinsky Date: Sun, 16 Feb 2020 15:29:46 -0800 Subject: [PATCH] [scudo][standalone] Shift some data from dynamic to static Summary: Most of our larger data is dynamically allocated (via `map`) but it became an hindrance with regard to init time, for a cost to benefit ratio that is not great. So change the `TSD`s, `RegionInfo`, `ByteMap` to be static. Additionally, for reclaiming, we used mapped & unmapped a buffer each time, which is costly. It turns out that we can have a static buffer, and that there isn't much contention on it. One of the other things changed here, is that we hard set the number of TSDs on Android to the maximum number, as there could be a situation where cores are put to sleep and we could miss some. Subscribers: mgorny, #sanitizers, llvm-commits Tags: #sanitizers, #llvm Differential Revision: https://reviews.llvm.org/D74696 --- compiler-rt/lib/scudo/standalone/CMakeLists.txt | 1 + compiler-rt/lib/scudo/standalone/bytemap.h | 10 +++----- compiler-rt/lib/scudo/standalone/primary32.h | 6 +++-- compiler-rt/lib/scudo/standalone/primary64.h | 24 ++++++------------ compiler-rt/lib/scudo/standalone/release.cpp | 16 ++++++++++++ compiler-rt/lib/scudo/standalone/release.h | 32 ++++++++++++++++++------ compiler-rt/lib/scudo/standalone/tsd_exclusive.h | 19 ++++++-------- compiler-rt/lib/scudo/standalone/tsd_shared.h | 11 +++----- compiler-rt/lib/scudo/standalone/wrappers_c.inc | 2 +- 9 files changed, 69 insertions(+), 52 deletions(-) create mode 100644 compiler-rt/lib/scudo/standalone/release.cpp diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt index 9b85256..91b48b7 100644 --- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt @@ -88,6 +88,7 @@ set(SCUDO_SOURCES flags_parser.cpp fuchsia.cpp linux.cpp + release.cpp report.cpp string_utils.cpp ) diff --git a/compiler-rt/lib/scudo/standalone/bytemap.h b/compiler-rt/lib/scudo/standalone/bytemap.h index b3582a5..e0d54f4 100644 --- a/compiler-rt/lib/scudo/standalone/bytemap.h +++ b/compiler-rt/lib/scudo/standalone/bytemap.h @@ -17,12 +17,10 @@ namespace scudo { template class FlatByteMap { public: - void initLinkerInitialized() { - Map = reinterpret_cast(map(nullptr, Size, "scudo:bytemap")); - } - void init() { initLinkerInitialized(); } + void initLinkerInitialized() {} + void init() { memset(Map, 0, sizeof(Map)); } - void unmapTestOnly() { unmap(reinterpret_cast(Map), Size); } + void unmapTestOnly() {} void set(uptr Index, u8 Value) { DCHECK_LT(Index, Size); @@ -38,7 +36,7 @@ public: void enable() {} private: - u8 *Map; + u8 Map[Size]; }; } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index 79345cb..b50f91d 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -40,7 +40,8 @@ namespace scudo { template class SizeClassAllocator32 { + s32 MaxReleaseToOsIntervalMs = INT32_MAX> +class SizeClassAllocator32 { public: typedef SizeClassMapT SizeClassMap; // The bytemap can only track UINT8_MAX - 1 classes. @@ -49,7 +50,8 @@ public: static_assert((1UL << RegionSizeLog) >= SizeClassMap::MaxSize, ""); typedef SizeClassAllocator32 ThisT; + MaxReleaseToOsIntervalMs> + ThisT; typedef SizeClassAllocatorLocalCache CacheT; typedef typename CacheT::TransferBatch TransferBatch; static const bool SupportsMemoryTagging = false; diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index bc31db8..188f308 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -46,10 +46,9 @@ template + typedef SizeClassAllocator64< + SizeClassMap, RegionSizeLog, MinReleaseToOsIntervalMs, + MaxReleaseToOsIntervalMs, MaySupportMemoryTagging> ThisT; typedef SizeClassAllocatorLocalCache CacheT; typedef typename CacheT::TransferBatch TransferBatch; @@ -69,11 +68,6 @@ public: PrimaryBase = reinterpret_cast( map(nullptr, PrimarySize, "scudo:primary", MAP_NOACCESS, &Data)); - RegionInfoArray = reinterpret_cast( - map(nullptr, sizeof(RegionInfo) * NumClasses, "scudo:regioninfo")); - DCHECK_EQ(reinterpret_cast(RegionInfoArray) % SCUDO_CACHE_LINE_SIZE, - 0); - u32 Seed; if (UNLIKELY(!getRandom(reinterpret_cast(&Seed), sizeof(Seed)))) Seed = static_cast(getMonotonicTime() ^ (PrimaryBase >> 12)); @@ -106,8 +100,6 @@ public: void unmapTestOnly() { unmap(reinterpret_cast(PrimaryBase), PrimarySize, UNMAP_ALL, &Data); - unmap(reinterpret_cast(RegionInfoArray), - sizeof(RegionInfo) * NumClasses); } TransferBatch *popBatch(CacheT *C, uptr ClassId) { @@ -156,7 +148,7 @@ public: } } - template void iterateOverBlocks(F Callback) const { + template void iterateOverBlocks(F Callback) { for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; @@ -169,7 +161,7 @@ public: } } - void getStats(ScopedString *Str) const { + void getStats(ScopedString *Str) { // TODO(kostyak): get the RSS per region. uptr TotalMapped = 0; uptr PoppedBlocks = 0; @@ -252,12 +244,12 @@ private: static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); uptr PrimaryBase; - RegionInfo *RegionInfoArray; MapPlatformData Data; atomic_s32 ReleaseToOsIntervalMs; bool UseMemoryTagging; + RegionInfo RegionInfoArray[NumClasses]; - RegionInfo *getRegionInfo(uptr ClassId) const { + RegionInfo *getRegionInfo(uptr ClassId) { DCHECK_LT(ClassId, NumClasses); return &RegionInfoArray[ClassId]; } @@ -371,7 +363,7 @@ private: return B; } - void getStats(ScopedString *Str, uptr ClassId, uptr Rss) const { + void getStats(ScopedString *Str, uptr ClassId, uptr Rss) { RegionInfo *Region = getRegionInfo(ClassId); if (Region->MappedUser == 0) return; diff --git a/compiler-rt/lib/scudo/standalone/release.cpp b/compiler-rt/lib/scudo/standalone/release.cpp new file mode 100644 index 0000000..e144b35 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/release.cpp @@ -0,0 +1,16 @@ +//===-- release.cpp ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "release.h" + +namespace scudo { + +HybridMutex PackedCounterArray::Mutex = {}; +uptr PackedCounterArray::StaticBuffer[1024]; + +} // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h index 37aa5d4..c4f6797 100644 --- a/compiler-rt/lib/scudo/standalone/release.h +++ b/compiler-rt/lib/scudo/standalone/release.h @@ -11,6 +11,7 @@ #include "common.h" #include "list.h" +#include "mutex.h" namespace scudo { @@ -39,11 +40,13 @@ private: }; // A packed array of Counters. Each counter occupies 2^N bits, enough to store -// counter's MaxValue. Ctor will try to allocate the required Buffer via map() -// and the caller is expected to check whether the initialization was successful -// by checking isAllocated() result. For the performance sake, none of the -// accessors check the validity of the arguments, It is assumed that Index is -// always in [0, N) range and the value is not incremented past MaxValue. +// counter's MaxValue. Ctor will try to use a static buffer first, and if that +// fails (the buffer is too small or already locked), will allocate the +// required Buffer via map(). The caller is expected to check whether the +// initialization was successful by checking isAllocated() result. For +// performance sake, none of the accessors check the validity of the arguments, +// It is assumed that Index is always in [0, N) range and the value is not +// incremented past MaxValue. class PackedCounterArray { public: PackedCounterArray(uptr NumCounters, uptr MaxValue) : N(NumCounters) { @@ -66,11 +69,20 @@ public: BufferSize = (roundUpTo(N, static_cast(1U) << PackingRatioLog) >> PackingRatioLog) * sizeof(*Buffer); - Buffer = reinterpret_cast( - map(nullptr, BufferSize, "scudo:counters", MAP_ALLOWNOMEM)); + if (BufferSize <= StaticBufferSize && Mutex.tryLock()) { + Buffer = &StaticBuffer[0]; + memset(Buffer, 0, BufferSize); + } else { + Buffer = reinterpret_cast( + map(nullptr, BufferSize, "scudo:counters", MAP_ALLOWNOMEM)); + } } ~PackedCounterArray() { - if (isAllocated()) + if (!isAllocated()) + return; + if (Buffer == &StaticBuffer[0]) + Mutex.unlock(); + else unmap(reinterpret_cast(Buffer), BufferSize); } @@ -110,6 +122,10 @@ private: uptr BufferSize; uptr *Buffer; + + static HybridMutex Mutex; + static const uptr StaticBufferSize = 1024U; + static uptr StaticBuffer[StaticBufferSize]; }; template class FreePagesRangeTracker { diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h index 69479ea..3492509 100644 --- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h +++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h @@ -25,9 +25,7 @@ template struct TSDRegistryExT { void initLinkerInitialized(Allocator *Instance) { Instance->initLinkerInitialized(); CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread), 0); - FallbackTSD = reinterpret_cast *>( - map(nullptr, sizeof(TSD), "scudo:tsd")); - FallbackTSD->initLinkerInitialized(Instance); + FallbackTSD.initLinkerInitialized(Instance); Initialized = true; } void init(Allocator *Instance) { @@ -35,9 +33,7 @@ template struct TSDRegistryExT { initLinkerInitialized(Instance); } - void unmapTestOnly() { - unmap(reinterpret_cast(FallbackTSD), sizeof(TSD)); - } + void unmapTestOnly() {} ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) { if (LIKELY(State != ThreadState::NotInitialized)) @@ -51,23 +47,22 @@ template struct TSDRegistryExT { *UnlockRequired = false; return &ThreadTSD; } - DCHECK(FallbackTSD); - FallbackTSD->lock(); + FallbackTSD.lock(); *UnlockRequired = true; - return FallbackTSD; + return &FallbackTSD; } // To disable the exclusive TSD registry, we effectively lock the fallback TSD // and force all threads to attempt to use it instead of their local one. void disable() { Mutex.lock(); - FallbackTSD->lock(); + FallbackTSD.lock(); atomic_store(&Disabled, 1U, memory_order_release); } void enable() { atomic_store(&Disabled, 0U, memory_order_release); - FallbackTSD->unlock(); + FallbackTSD.unlock(); Mutex.unlock(); } @@ -96,7 +91,7 @@ private: pthread_key_t PThreadKey; bool Initialized; atomic_u8 Disabled; - TSD *FallbackTSD; + TSD FallbackTSD; HybridMutex Mutex; static THREADLOCAL ThreadState State; static THREADLOCAL TSD ThreadTSD; diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h index cf5453d..038a590 100644 --- a/compiler-rt/lib/scudo/standalone/tsd_shared.h +++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h @@ -19,10 +19,9 @@ template struct TSDRegistrySharedT { Instance->initLinkerInitialized(); CHECK_EQ(pthread_key_create(&PThreadKey, nullptr), 0); // For non-TLS const u32 NumberOfCPUs = getNumberOfCPUs(); - NumberOfTSDs = - (NumberOfCPUs == 0) ? MaxTSDCount : Min(NumberOfCPUs, MaxTSDCount); - TSDs = reinterpret_cast *>( - map(nullptr, sizeof(TSD) * NumberOfTSDs, "scudo:tsd")); + NumberOfTSDs = (SCUDO_ANDROID || NumberOfCPUs == 0) + ? MaxTSDCount + : Min(NumberOfCPUs, MaxTSDCount); for (u32 I = 0; I < NumberOfTSDs; I++) TSDs[I].initLinkerInitialized(Instance); // Compute all the coprimes of NumberOfTSDs. This will be used to walk the @@ -48,8 +47,6 @@ template struct TSDRegistrySharedT { } void unmapTestOnly() { - unmap(reinterpret_cast(TSDs), - sizeof(TSD) * NumberOfTSDs); setCurrentTSD(nullptr); pthread_key_delete(PThreadKey); } @@ -162,11 +159,11 @@ private: pthread_key_t PThreadKey; atomic_u32 CurrentIndex; u32 NumberOfTSDs; - TSD *TSDs; u32 NumberOfCoPrimes; u32 CoPrimes[MaxTSDCount]; bool Initialized; HybridMutex Mutex; + TSD TSDs[MaxTSDCount]; #if SCUDO_LINUX && !_BIONIC static THREADLOCAL TSD *ThreadTSD; #endif diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc index 314a835..5a6c1a8d 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc @@ -195,7 +195,7 @@ INTERFACE WEAK int SCUDO_PREFIX(malloc_info)(UNUSED int options, FILE *stream) { decltype(SCUDO_ALLOCATOR)::PrimaryT::SizeClassMap::MaxSize; auto *sizes = static_cast( SCUDO_PREFIX(calloc)(max_size, sizeof(scudo::uptr))); - auto callback = [](uintptr_t, size_t size, void* arg) { + auto callback = [](uintptr_t, size_t size, void *arg) { auto *sizes = reinterpret_cast(arg); if (size < max_size) sizes[size]++; -- 2.7.4