// typedef SizeClassAllocator64<ExampleConfig> Primary;
// // Log2 of the size of a size class region, as used by the Primary.
// static const uptr PrimaryRegionSizeLog = 30U;
+// // Log2 of the size of block group, as used by the Primary. Each group
+// // contains a range of memory addresses, blocks in the range will belong to
+// // the same group. In general, single region may have 1 or 2MB group size.
+// // Multiple regions will have the group size equal to the region size
+// // because the region size is usually smaller than 1 MB.
+// // Smaller value gives fine-grained control of memory usage but the trade
+// // off is that it may take longer time of deallocation.
+// static const uptr PrimaryGroupSizeLog = 20U;
// // Defines the type and scale of a compact pointer. A compact pointer can
// // be understood as the offset of a pointer within the region it belongs
// // to, in increments of a power-of-2 scale.
#if SCUDO_CAN_USE_PRIMARY64
typedef SizeClassAllocator64<DefaultConfig> Primary;
static const uptr PrimaryRegionSizeLog = 32U;
+ static const uptr PrimaryGroupSizeLog = 21U;
typedef uptr PrimaryCompactPtrT;
static const uptr PrimaryCompactPtrScale = 0;
static const bool PrimaryEnableRandomOffset = true;
#else
typedef SizeClassAllocator32<DefaultConfig> Primary;
static const uptr PrimaryRegionSizeLog = 19U;
+ static const uptr PrimaryGroupSizeLog = 19U;
typedef uptr PrimaryCompactPtrT;
#endif
static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
static const uptr PrimaryRegionSizeLog = 28U;
typedef u32 PrimaryCompactPtrT;
static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
+ static const uptr PrimaryGroupSizeLog = 20U;
static const bool PrimaryEnableRandomOffset = true;
static const uptr PrimaryMapSizeIncrement = 1UL << 18;
#else
typedef SizeClassAllocator32<AndroidConfig> Primary;
static const uptr PrimaryRegionSizeLog = 18U;
+ static const uptr PrimaryGroupSizeLog = 18U;
typedef uptr PrimaryCompactPtrT;
#endif
static const s32 PrimaryMinReleaseToOsIntervalMs = 1000;
static const uptr PrimaryRegionSizeLog = 27U;
typedef u32 PrimaryCompactPtrT;
static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
+ static const uptr PrimaryGroupSizeLog = 18U;
static const bool PrimaryEnableRandomOffset = true;
static const uptr PrimaryMapSizeIncrement = 1UL << 18;
#else
typedef SizeClassAllocator32<AndroidSvelteConfig> Primary;
static const uptr PrimaryRegionSizeLog = 16U;
+ static const uptr PrimaryGroupSizeLog = 16U;
typedef uptr PrimaryCompactPtrT;
#endif
static const s32 PrimaryMinReleaseToOsIntervalMs = 1000;
typedef SizeClassAllocator64<FuchsiaConfig> Primary;
static const uptr PrimaryRegionSizeLog = 30U;
+ static const uptr PrimaryGroupSizeLog = 30U;
typedef u32 PrimaryCompactPtrT;
static const bool PrimaryEnableRandomOffset = true;
static const uptr PrimaryMapSizeIncrement = 1UL << 18;
typedef SizeClassAllocator64<TrustyConfig> Primary;
// Some apps have 1 page of heap total so small regions are necessary.
static const uptr PrimaryRegionSizeLog = 10U;
+ static const uptr PrimaryGroupSizeLog = 10U;
typedef u32 PrimaryCompactPtrT;
static const bool PrimaryEnableRandomOffset = false;
// Trusty is extremely memory-constrained so minimally round up map calls.
Size--;
}
+ // Insert X next to Prev
+ void insert(T *Prev, T *X) {
+ DCHECK(!empty());
+ DCHECK_NE(Prev, nullptr);
+ DCHECK_NE(X, nullptr);
+ X->Next = Prev->Next;
+ Prev->Next = X;
+ if (Last == Prev)
+ Last = X;
+ ++Size;
+ }
+
void extract(T *Prev, T *X) {
DCHECK(!empty());
DCHECK_NE(Prev, nullptr);
#define SCUDO_LOCAL_CACHE_H_
#include "internal_defs.h"
+#include "list.h"
#include "platform.h"
#include "report.h"
#include "stats.h"
Count = N;
memcpy(Batch, Array, sizeof(Batch[0]) * Count);
}
+ void appendFromArray(CompactPtrT *Array, u16 N) {
+ DCHECK_LE(N, MaxNumCached - Count);
+ memcpy(Batch + Count, Array, sizeof(Batch[0]) * N);
+ // u16 will be promoted to int by arithmetic type conversion.
+ Count = static_cast<u16>(Count + N);
+ }
void clear() { Count = 0; }
void add(CompactPtrT P) {
DCHECK_LT(Count, MaxNumCached);
u16 Count;
};
+ // A BatchGroup is used to collect blocks. Each group has a group id to
+ // identify the group kind of contained blocks.
+ struct BatchGroup {
+ // `Next` is used by IntrusiveList.
+ BatchGroup *Next;
+ // The identifier of each group
+ uptr GroupId;
+ // Cache value of TransferBatch::getMaxCached()
+ u16 MaxCachedPerBatch;
+ // Blocks are managed by TransferBatch in a list.
+ SinglyLinkedList<TransferBatch> Batches;
+ };
+
+ static_assert(sizeof(BatchGroup) <= sizeof(TransferBatch),
+ "BatchGroup uses the same class size as TransferBatch");
+
void init(GlobalStats *S, SizeClassAllocator *A) {
DCHECK(isEmpty());
Stats.init();
TransferBatch *createBatch(uptr ClassId, void *B) {
if (ClassId != BatchClassId)
B = allocate(BatchClassId);
+ if (UNLIKELY(!B))
+ reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId));
return reinterpret_cast<TransferBatch *>(B);
}
+ BatchGroup *createGroup() {
+ void *Ptr = allocate(BatchClassId);
+ if (UNLIKELY(!Ptr))
+ reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId));
+ return reinterpret_cast<BatchGroup *>(Ptr);
+ }
+
LocalStats &getStats() { return Stats; }
private:
NOINLINE void drain(PerClass *C, uptr ClassId) {
const u16 Count = Min(static_cast<u16>(C->MaxCount / 2), C->Count);
- TransferBatch *B =
- createBatch(ClassId, Allocator->decompactPtr(ClassId, C->Chunks[0]));
- if (UNLIKELY(!B))
- reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId));
- B->setFromArray(&C->Chunks[0], Count);
+ Allocator->pushBlocks(this, ClassId, &C->Chunks[0], Count);
// u16 will be promoted to int by arithmetic type conversion.
C->Count = static_cast<u16>(C->Count - Count);
for (u16 I = 0; I < C->Count; I++)
C->Chunks[I] = C->Chunks[I + Count];
- Allocator->pushBatch(ClassId, B);
}
};
public:
typedef typename Config::PrimaryCompactPtrT CompactPtrT;
typedef typename Config::SizeClassMap SizeClassMap;
+ static const uptr GroupSizeLog = Config::PrimaryGroupSizeLog;
// The bytemap can only track UINT8_MAX - 1 classes.
static_assert(SizeClassMap::LargestClassId <= (UINT8_MAX - 1), "");
// Regions should be large enough to hold the largest Block.
typedef SizeClassAllocator32<Config> ThisT;
typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
typedef typename CacheT::TransferBatch TransferBatch;
+ typedef typename CacheT::BatchGroup BatchGroup;
static uptr getSizeByClassId(uptr ClassId) {
return (ClassId == SizeClassMap::BatchClassId)
return reinterpret_cast<void *>(static_cast<uptr>(CompactPtr));
}
+ uptr compactPtrGroup(CompactPtrT CompactPtr) {
+ return CompactPtr >> GroupSizeLog;
+ }
+
TransferBatch *popBatch(CacheT *C, uptr ClassId) {
DCHECK_LT(ClassId, NumClasses);
SizeClassInfo *Sci = getSizeClassInfo(ClassId);
ScopedLock L(Sci->Mutex);
- TransferBatch *B = Sci->FreeList.front();
- if (B) {
- Sci->FreeList.pop_front();
- } else {
- B = populateFreeList(C, ClassId, Sci);
- if (UNLIKELY(!B))
+ TransferBatch *B = popBatchImpl(C, ClassId);
+ if (UNLIKELY(!B)) {
+ if (UNLIKELY(!populateFreeList(C, ClassId, Sci)))
return nullptr;
+ B = popBatchImpl(C, ClassId);
+ // if `populateFreeList` succeeded, we are supposed to get free blocks.
+ DCHECK_NE(B, nullptr);
}
- DCHECK_GT(B->getCount(), 0);
Sci->Stats.PoppedBlocks += B->getCount();
return B;
}
- void pushBatch(uptr ClassId, TransferBatch *B) {
+ // Push the array of free blocks to the designated batch group.
+ void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) {
DCHECK_LT(ClassId, NumClasses);
- DCHECK_GT(B->getCount(), 0);
+ DCHECK_GT(Size, 0);
+
SizeClassInfo *Sci = getSizeClassInfo(ClassId);
+ if (ClassId == SizeClassMap::BatchClassId) {
+ ScopedLock L(Sci->Mutex);
+ // Constructing a batch group in the free list will use two blocks in
+ // BatchClassId. If we are pushing BatchClassId blocks, we will use the
+ // blocks in the array directly (can't delegate local cache which will
+ // cause a recursive allocation). However, The number of free blocks may
+ // be less than two. Therefore, populate the free list before inserting
+ // the blocks.
+ if (Size == 1 && !populateFreeList(C, ClassId, Sci))
+ return;
+ pushBlocksImpl(C, ClassId, Array, Size);
+ Sci->Stats.PushedBlocks += Size;
+ return;
+ }
+
+ // TODO(chiahungduan): Consider not doing grouping if the group size is not
+ // greater than the block size with a certain scale.
+
+ // Sort the blocks so that blocks belonging to the same group can be pushed
+ // together.
+ bool SameGroup = true;
+ for (u32 I = 1; I < Size; ++I) {
+ if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I]))
+ SameGroup = false;
+ CompactPtrT Cur = Array[I];
+ u32 J = I;
+ while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) {
+ Array[J] = Array[J - 1];
+ --J;
+ }
+ Array[J] = Cur;
+ }
+
ScopedLock L(Sci->Mutex);
- Sci->FreeList.push_front(B);
- Sci->Stats.PushedBlocks += B->getCount();
+ pushBlocksImpl(C, ClassId, Array, Size, SameGroup);
+
+ Sci->Stats.PushedBlocks += Size;
if (ClassId != SizeClassMap::BatchClassId)
releaseToOSMaybe(Sci, ClassId);
}
struct alignas(SCUDO_CACHE_LINE_SIZE) SizeClassInfo {
HybridMutex Mutex;
- SinglyLinkedList<TransferBatch> FreeList;
+ SinglyLinkedList<BatchGroup> FreeList;
uptr CurrentRegion;
uptr CurrentRegionAllocated;
SizeClassStats Stats;
return &SizeClassInfoArray[ClassId];
}
- NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId,
- SizeClassInfo *Sci) {
+ // Push the blocks to their batch group. The layout will be like,
+ //
+ // FreeList - > BG -> BG -> BG
+ // | | |
+ // v v v
+ // TB TB TB
+ // |
+ // v
+ // TB
+ //
+ // Each BlockGroup(BG) will associate with unique group id and the free blocks
+ // are managed by a list of TransferBatch(TB). To reduce the time of inserting
+ // blocks, BGs are sorted and the input `Array` are supposed to be sorted so
+ // that we can get better performance of maintaining sorted property.
+ // Use `SameGroup=true` to indicate that all blocks in the array are from the
+ // same group then we will skip checking the group id of each block.
+ //
+ // Note that this aims to have a better management of dirty pages, i.e., the
+ // RSS usage won't grow indefinitely. There's an exception that we may not put
+ // a block to its associated group. While populating new blocks, we may have
+ // blocks cross different groups. However, most cases will fall into same
+ // group and they are supposed to be popped soon. In that case, it's not worth
+ // sorting the array with the almost-sorted property. Therefore, we use
+ // `SameGroup=true` instead.
+ //
+ // The region mutex needs to be held while calling this method.
+ void pushBlocksImpl(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size,
+ bool SameGroup = false) {
+ DCHECK_GT(Size, 0U);
+ SizeClassInfo *Sci = getSizeClassInfo(ClassId);
+
+ auto CreateGroup = [&](uptr GroupId) {
+ BatchGroup *BG = nullptr;
+ TransferBatch *TB = nullptr;
+ if (ClassId == SizeClassMap::BatchClassId) {
+ DCHECK_GE(Size, 2U);
+ BG = reinterpret_cast<BatchGroup *>(
+ decompactPtr(ClassId, Array[Size - 1]));
+ BG->Batches.clear();
+
+ TB = reinterpret_cast<TransferBatch *>(
+ decompactPtr(ClassId, Array[Size - 2]));
+ TB->clear();
+ } else {
+ BG = C->createGroup();
+ BG->Batches.clear();
+
+ TB = C->createBatch(ClassId, nullptr);
+ TB->clear();
+ }
+
+ BG->GroupId = GroupId;
+ BG->Batches.push_front(TB);
+ BG->MaxCachedPerBatch =
+ TransferBatch::getMaxCached(getSizeByClassId(ClassId));
+
+ return BG;
+ };
+
+ auto InsertBlocks = [&](BatchGroup *BG, CompactPtrT *Array, u32 Size) {
+ SinglyLinkedList<TransferBatch> &Batches = BG->Batches;
+ TransferBatch *CurBatch = Batches.front();
+ DCHECK_NE(CurBatch, nullptr);
+
+ for (u32 I = 0; I < Size;) {
+ DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount());
+ u16 UnusedSlots =
+ static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
+ if (UnusedSlots == 0) {
+ CurBatch = C->createBatch(
+ ClassId,
+ reinterpret_cast<void *>(decompactPtr(ClassId, Array[I])));
+ CurBatch->clear();
+ Batches.push_front(CurBatch);
+ UnusedSlots = BG->MaxCachedPerBatch;
+ }
+ // `UnusedSlots` is u16 so the result will be also fit in u16.
+ u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I));
+ CurBatch->appendFromArray(&Array[I], AppendSize);
+ I += AppendSize;
+ }
+ };
+
+ BatchGroup *Cur = Sci->FreeList.front();
+
+ if (ClassId == SizeClassMap::BatchClassId) {
+ if (Cur == nullptr) {
+ // Don't need to classify BatchClassId.
+ Cur = CreateGroup(/*GroupId=*/0);
+ Sci->FreeList.push_front(Cur);
+ }
+ InsertBlocks(Cur, Array, Size);
+ return;
+ }
+
+ // In the following, `Cur` always points to the BatchGroup for blocks that
+ // will be pushed next. `Prev` is the element right before `Cur`.
+ BatchGroup *Prev = nullptr;
+
+ while (Cur != nullptr && compactPtrGroup(Array[0]) > Cur->GroupId) {
+ Prev = Cur;
+ Cur = Cur->Next;
+ }
+
+ if (Cur == nullptr || compactPtrGroup(Array[0]) != Cur->GroupId) {
+ Cur = CreateGroup(compactPtrGroup(Array[0]));
+ if (Prev == nullptr)
+ Sci->FreeList.push_front(Cur);
+ else
+ Sci->FreeList.insert(Prev, Cur);
+ }
+
+ // All the blocks are from the same group, just push without checking group
+ // id.
+ if (SameGroup) {
+ InsertBlocks(Cur, Array, Size);
+ return;
+ }
+
+ // The blocks are sorted by group id. Determine the segment of group and
+ // push them to their group together.
+ u32 Count = 1;
+ for (u32 I = 1; I < Size; ++I) {
+ if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) {
+ DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->GroupId);
+ InsertBlocks(Cur, Array + I - Count, Count);
+
+ while (Cur != nullptr && compactPtrGroup(Array[I]) > Cur->GroupId) {
+ Prev = Cur;
+ Cur = Cur->Next;
+ }
+
+ if (Cur == nullptr || compactPtrGroup(Array[I]) != Cur->GroupId) {
+ Cur = CreateGroup(compactPtrGroup(Array[I]));
+ DCHECK_NE(Prev, nullptr);
+ Sci->FreeList.insert(Prev, Cur);
+ }
+
+ Count = 1;
+ } else {
+ ++Count;
+ }
+ }
+
+ InsertBlocks(Cur, Array + Size - Count, Count);
+ }
+
+ // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest
+ // group id will be considered first.
+ //
+ // The region mutex needs to be held while calling this method.
+ TransferBatch *popBatchImpl(CacheT *C, uptr ClassId) {
+ SizeClassInfo *Sci = getSizeClassInfo(ClassId);
+ if (Sci->FreeList.empty())
+ return nullptr;
+
+ SinglyLinkedList<TransferBatch> &Batches = Sci->FreeList.front()->Batches;
+ DCHECK(!Batches.empty());
+
+ TransferBatch *B = Batches.front();
+ Batches.pop_front();
+ DCHECK_NE(B, nullptr);
+ DCHECK_GT(B->getCount(), 0U);
+
+ if (Batches.empty()) {
+ BatchGroup *BG = Sci->FreeList.front();
+ Sci->FreeList.pop_front();
+
+ // We don't keep BatchGroup with zero blocks to avoid empty-checking while
+ // allocating. Note that block used by constructing BatchGroup is recorded
+ // as free blocks in the last element of BatchGroup::Batches. Which means,
+ // once we pop the last TransferBatch, the block is implicitly
+ // deallocated.
+ if (ClassId != SizeClassMap::BatchClassId)
+ C->deallocate(SizeClassMap::BatchClassId, BG);
+ }
+
+ return B;
+ }
+
+ NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci) {
uptr Region;
uptr Offset;
// If the size-class currently has a region associated to it, use it. The
DCHECK_EQ(Sci->CurrentRegionAllocated, 0U);
Region = allocateRegion(Sci, ClassId);
if (UNLIKELY(!Region))
- return nullptr;
+ return false;
C->getStats().add(StatMapped, RegionSize);
Sci->CurrentRegion = Region;
Offset = 0;
if (ClassId != SizeClassMap::BatchClassId)
shuffle(ShuffleArray, NumberOfBlocks, &Sci->RandState);
for (u32 I = 0; I < NumberOfBlocks;) {
- TransferBatch *B =
- C->createBatch(ClassId, reinterpret_cast<void *>(ShuffleArray[I]));
- if (UNLIKELY(!B))
- return nullptr;
// `MaxCount` is u16 so the result will also fit in u16.
const u16 N = static_cast<u16>(Min<u32>(MaxCount, NumberOfBlocks - I));
- B->setFromArray(&ShuffleArray[I], N);
- Sci->FreeList.push_back(B);
+ // Note that the N blocks here may have different group ids. Given that
+ // it only happens when it crosses the group size boundary. Instead of
+ // sorting them, treat them as same group here to avoid sorting the
+ // almost-sorted blocks.
+ pushBlocksImpl(C, ClassId, &ShuffleArray[I], N, /*SameGroup=*/true);
I += N;
}
- TransferBatch *B = Sci->FreeList.front();
- Sci->FreeList.pop_front();
- DCHECK(B);
- DCHECK_GT(B->getCount(), 0);
const uptr AllocatedUser = Size * NumberOfBlocks;
C->getStats().add(StatFree, AllocatedUser);
}
Sci->AllocatedUser += AllocatedUser;
- return B;
+ return true;
}
void getStats(ScopedString *Str, uptr ClassId, uptr Rss) {
auto DecompactPtr = [](CompactPtrT CompactPtr) {
return reinterpret_cast<uptr>(CompactPtr);
};
- releaseFreeMemoryToOS(Sci->FreeList, RegionSize, NumberOfRegions, BlockSize,
- Recorder, DecompactPtr, SkipRegion);
+ PageReleaseContext Context(BlockSize, RegionSize, NumberOfRegions);
+ for (BatchGroup &BG : Sci->FreeList)
+ Context.markFreeBlocks(BG.Batches, DecompactPtr, Base);
+
+ releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
+
if (Recorder.getReleasedRangesCount() > 0) {
Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks;
Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();
public:
typedef typename Config::PrimaryCompactPtrT CompactPtrT;
static const uptr CompactPtrScale = Config::PrimaryCompactPtrScale;
+ static const uptr GroupSizeLog = Config::PrimaryGroupSizeLog;
typedef typename Config::SizeClassMap SizeClassMap;
typedef SizeClassAllocator64<Config> ThisT;
typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
typedef typename CacheT::TransferBatch TransferBatch;
+ typedef typename CacheT::BatchGroup BatchGroup;
static uptr getSizeByClassId(uptr ClassId) {
return (ClassId == SizeClassMap::BatchClassId)
DCHECK_LT(ClassId, NumClasses);
RegionInfo *Region = getRegionInfo(ClassId);
ScopedLock L(Region->Mutex);
- TransferBatch *B = Region->FreeList.front();
- if (B) {
- Region->FreeList.pop_front();
- } else {
- B = populateFreeList(C, ClassId, Region);
- if (UNLIKELY(!B))
+ TransferBatch *B = popBatchImpl(C, ClassId);
+ if (UNLIKELY(!B)) {
+ if (UNLIKELY(!populateFreeList(C, ClassId, Region)))
return nullptr;
+ B = popBatchImpl(C, ClassId);
+ // if `populateFreeList` succeeded, we are supposed to get free blocks.
+ DCHECK_NE(B, nullptr);
}
- DCHECK_GT(B->getCount(), 0);
Region->Stats.PoppedBlocks += B->getCount();
return B;
}
- void pushBatch(uptr ClassId, TransferBatch *B) {
- DCHECK_GT(B->getCount(), 0);
+ // Push the array of free blocks to the designated batch group.
+ void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) {
+ DCHECK_LT(ClassId, NumClasses);
+ DCHECK_GT(Size, 0);
+
RegionInfo *Region = getRegionInfo(ClassId);
+ if (ClassId == SizeClassMap::BatchClassId) {
+ ScopedLock L(Region->Mutex);
+ // Constructing a batch group in the free list will use two blocks in
+ // BatchClassId. If we are pushing BatchClassId blocks, we will use the
+ // blocks in the array directly (can't delegate local cache which will
+ // cause a recursive allocation). However, The number of free blocks may
+ // be less than two. Therefore, populate the free list before inserting
+ // the blocks.
+ if (Size == 1 && UNLIKELY(!populateFreeList(C, ClassId, Region)))
+ return;
+ pushBlocksImpl(C, ClassId, Array, Size);
+ Region->Stats.PushedBlocks += Size;
+ return;
+ }
+
+ // TODO(chiahungduan): Consider not doing grouping if the group size is not
+ // greater than the block size with a certain scale.
+
+ // Sort the blocks so that blocks belonging to the same group can be pushed
+ // together.
+ bool SameGroup = true;
+ for (u32 I = 1; I < Size; ++I) {
+ if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I]))
+ SameGroup = false;
+ CompactPtrT Cur = Array[I];
+ u32 J = I;
+ while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) {
+ Array[J] = Array[J - 1];
+ --J;
+ }
+ Array[J] = Cur;
+ }
+
ScopedLock L(Region->Mutex);
- Region->FreeList.push_front(B);
- Region->Stats.PushedBlocks += B->getCount();
+ pushBlocksImpl(C, ClassId, Array, Size, SameGroup);
+
+ Region->Stats.PushedBlocks += Size;
if (ClassId != SizeClassMap::BatchClassId)
releaseToOSMaybe(Region, ClassId);
}
struct UnpaddedRegionInfo {
HybridMutex Mutex;
- SinglyLinkedList<TransferBatch> FreeList;
+ SinglyLinkedList<BatchGroup> FreeList;
uptr RegionBeg = 0;
RegionStats Stats = {};
u32 RandState = 0;
return Base + (static_cast<uptr>(CompactPtr) << CompactPtrScale);
}
- NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId,
- RegionInfo *Region) {
+ static uptr compactPtrGroup(CompactPtrT CompactPtr) {
+ return CompactPtr >> (GroupSizeLog - CompactPtrScale);
+ }
+
+ // Push the blocks to their batch group. The layout will be like,
+ //
+ // FreeList - > BG -> BG -> BG
+ // | | |
+ // v v v
+ // TB TB TB
+ // |
+ // v
+ // TB
+ //
+ // Each BlockGroup(BG) will associate with unique group id and the free blocks
+ // are managed by a list of TransferBatch(TB). To reduce the time of inserting
+ // blocks, BGs are sorted and the input `Array` are supposed to be sorted so
+ // that we can get better performance of maintaining sorted property.
+ // Use `SameGroup=true` to indicate that all blocks in the array are from the
+ // same group then we will skip checking the group id of each block.
+ //
+ // Note that this aims to have a better management of dirty pages, i.e., the
+ // RSS usage won't grow indefinitely. There's an exception that we may not put
+ // a block to its associated group. While populating new blocks, we may have
+ // blocks cross different groups. However, most cases will fall into same
+ // group and they are supposed to be popped soon. In that case, it's not worth
+ // sorting the array with the almost-sorted property. Therefore, we use
+ // `SameGroup=true` instead.
+ //
+ // The region mutex needs to be held while calling this method.
+ void pushBlocksImpl(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size,
+ bool SameGroup = false) {
+ DCHECK_GT(Size, 0U);
+ RegionInfo *Region = getRegionInfo(ClassId);
+
+ auto CreateGroup = [&](uptr GroupId) {
+ BatchGroup *BG = nullptr;
+ TransferBatch *TB = nullptr;
+ if (ClassId == SizeClassMap::BatchClassId) {
+ DCHECK_GE(Size, 2U);
+ BG = reinterpret_cast<BatchGroup *>(
+ decompactPtr(ClassId, Array[Size - 1]));
+ BG->Batches.clear();
+
+ TB = reinterpret_cast<TransferBatch *>(
+ decompactPtr(ClassId, Array[Size - 2]));
+ TB->clear();
+ } else {
+ BG = C->createGroup();
+ BG->Batches.clear();
+
+ TB = C->createBatch(ClassId, nullptr);
+ TB->clear();
+ }
+
+ BG->GroupId = GroupId;
+ BG->Batches.push_front(TB);
+ BG->MaxCachedPerBatch =
+ TransferBatch::getMaxCached(getSizeByClassId(ClassId));
+
+ return BG;
+ };
+
+ auto InsertBlocks = [&](BatchGroup *BG, CompactPtrT *Array, u32 Size) {
+ SinglyLinkedList<TransferBatch> &Batches = BG->Batches;
+ TransferBatch *CurBatch = Batches.front();
+ DCHECK_NE(CurBatch, nullptr);
+
+ for (u32 I = 0; I < Size;) {
+ DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount());
+ u16 UnusedSlots =
+ static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
+ if (UnusedSlots == 0) {
+ CurBatch = C->createBatch(
+ ClassId,
+ reinterpret_cast<void *>(decompactPtr(ClassId, Array[I])));
+ CurBatch->clear();
+ Batches.push_front(CurBatch);
+ UnusedSlots = BG->MaxCachedPerBatch;
+ }
+ // `UnusedSlots` is u16 so the result will be also fit in u16.
+ u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I));
+ CurBatch->appendFromArray(&Array[I], AppendSize);
+ I += AppendSize;
+ }
+ };
+
+ BatchGroup *Cur = Region->FreeList.front();
+
+ if (ClassId == SizeClassMap::BatchClassId) {
+ if (Cur == nullptr) {
+ // Don't need to classify BatchClassId.
+ Cur = CreateGroup(/*GroupId=*/0);
+ Region->FreeList.push_front(Cur);
+ }
+ InsertBlocks(Cur, Array, Size);
+ return;
+ }
+
+ // In the following, `Cur` always points to the BatchGroup for blocks that
+ // will be pushed next. `Prev` is the element right before `Cur`.
+ BatchGroup *Prev = nullptr;
+
+ while (Cur != nullptr && compactPtrGroup(Array[0]) > Cur->GroupId) {
+ Prev = Cur;
+ Cur = Cur->Next;
+ }
+
+ if (Cur == nullptr || compactPtrGroup(Array[0]) != Cur->GroupId) {
+ Cur = CreateGroup(compactPtrGroup(Array[0]));
+ if (Prev == nullptr)
+ Region->FreeList.push_front(Cur);
+ else
+ Region->FreeList.insert(Prev, Cur);
+ }
+
+ // All the blocks are from the same group, just push without checking group
+ // id.
+ if (SameGroup) {
+ InsertBlocks(Cur, Array, Size);
+ return;
+ }
+
+ // The blocks are sorted by group id. Determine the segment of group and
+ // push them to their group together.
+ u32 Count = 1;
+ for (u32 I = 1; I < Size; ++I) {
+ if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) {
+ DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->GroupId);
+ InsertBlocks(Cur, Array + I - Count, Count);
+
+ while (Cur != nullptr && compactPtrGroup(Array[I]) > Cur->GroupId) {
+ Prev = Cur;
+ Cur = Cur->Next;
+ }
+
+ if (Cur == nullptr || compactPtrGroup(Array[I]) != Cur->GroupId) {
+ Cur = CreateGroup(compactPtrGroup(Array[I]));
+ DCHECK_NE(Prev, nullptr);
+ Region->FreeList.insert(Prev, Cur);
+ }
+
+ Count = 1;
+ } else {
+ ++Count;
+ }
+ }
+
+ InsertBlocks(Cur, Array + Size - Count, Count);
+ }
+
+ // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest
+ // group id will be considered first.
+ //
+ // The region mutex needs to be held while calling this method.
+ TransferBatch *popBatchImpl(CacheT *C, uptr ClassId) {
+ RegionInfo *Region = getRegionInfo(ClassId);
+ if (Region->FreeList.empty())
+ return nullptr;
+
+ SinglyLinkedList<TransferBatch> &Batches =
+ Region->FreeList.front()->Batches;
+ DCHECK(!Batches.empty());
+
+ TransferBatch *B = Batches.front();
+ Batches.pop_front();
+ DCHECK_NE(B, nullptr);
+ DCHECK_GT(B->getCount(), 0U);
+
+ if (Batches.empty()) {
+ BatchGroup *BG = Region->FreeList.front();
+ Region->FreeList.pop_front();
+
+ // We don't keep BatchGroup with zero blocks to avoid empty-checking while
+ // allocating. Note that block used by constructing BatchGroup is recorded
+ // as free blocks in the last element of BatchGroup::Batches. Which means,
+ // once we pop the last TransferBatch, the block is implicitly
+ // deallocated.
+ if (ClassId != SizeClassMap::BatchClassId)
+ C->deallocate(SizeClassMap::BatchClassId, BG);
+ }
+
+ return B;
+ }
+
+ NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, RegionInfo *Region) {
const uptr Size = getSizeByClassId(ClassId);
const u16 MaxCount = TransferBatch::getMaxCached(Size);
RegionSize >> 20, Size);
Str.output();
}
- return nullptr;
+ return false;
}
if (MappedUser == 0)
Region->Data = Data;
"scudo:primary",
MAP_ALLOWNOMEM | MAP_RESIZABLE |
(useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG : 0),
- &Region->Data)))
- return nullptr;
+ &Region->Data))) {
+ return false;
+ }
Region->MappedUser += MapSize;
C->getStats().add(StatMapped, MapSize);
}
if (ClassId != SizeClassMap::BatchClassId)
shuffle(ShuffleArray, NumberOfBlocks, &Region->RandState);
for (u32 I = 0; I < NumberOfBlocks;) {
- TransferBatch *B =
- C->createBatch(ClassId, reinterpret_cast<void *>(decompactPtrInternal(
- CompactPtrBase, ShuffleArray[I])));
- if (UNLIKELY(!B))
- return nullptr;
// `MaxCount` is u16 so the result will also fit in u16.
const u16 N = static_cast<u16>(Min<u32>(MaxCount, NumberOfBlocks - I));
- B->setFromArray(&ShuffleArray[I], N);
- Region->FreeList.push_back(B);
+ // Note that the N blocks here may have different group ids. Given that
+ // it only happens when it crosses the group size boundary. Instead of
+ // sorting them, treat them as same group here to avoid sorting the
+ // almost-sorted blocks.
+ pushBlocksImpl(C, ClassId, &ShuffleArray[I], N, /*SameGroup=*/true);
I += N;
}
- TransferBatch *B = Region->FreeList.front();
- Region->FreeList.pop_front();
- DCHECK(B);
- DCHECK_GT(B->getCount(), 0);
const uptr AllocatedUser = Size * NumberOfBlocks;
C->getStats().add(StatFree, AllocatedUser);
Region->AllocatedUser += AllocatedUser;
- return B;
+ return true;
}
void getStats(ScopedString *Str, uptr ClassId, uptr Rss) {
return decompactPtrInternal(CompactPtrBase, CompactPtr);
};
auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
- releaseFreeMemoryToOS(Region->FreeList, Region->AllocatedUser, 1U,
- BlockSize, Recorder, DecompactPtr, SkipRegion);
+ PageReleaseContext Context(BlockSize, RegionSize, /*NumberOfRegions=*/1U);
+ for (BatchGroup &BG : Region->FreeList)
+ Context.markFreeBlocks(BG.Batches, DecompactPtr, Region->RegionBeg);
+
+ releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
if (Recorder.getReleasedRangesCount() > 0) {
Region->ReleaseInfo.PushedBlocksAtLastRelease =
static const scudo::uptr PrimaryCompactPtrScale = 0;
static const bool PrimaryEnableRandomOffset = true;
static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
+ static const scudo::uptr PrimaryGroupSizeLog = 18;
typedef scudo::MapAllocatorNoCache SecondaryCache;
template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U, 1U>;
setList(&L1, X);
checkList(&L1, X);
+ setList(&L1, X, Y);
+ L1.insert(X, Z);
+ checkList(&L1, X, Z, Y);
+
setList(&L1, X, Y, Z);
setList(&L2, A, B, C);
L1.append_back(&L2);
#include "primary64.h"
#include "size_class_map.h"
+#include <algorithm>
+#include <chrono>
#include <condition_variable>
#include <mutex>
+#include <random>
#include <stdlib.h>
#include <thread>
#include <vector>
struct TestConfig1 {
static const scudo::uptr PrimaryRegionSizeLog = 18U;
+ static const scudo::uptr PrimaryGroupSizeLog = 18U;
static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
static const bool MaySupportMemoryTagging = false;
#else
static const scudo::uptr PrimaryRegionSizeLog = 24U;
#endif
+ static const scudo::uptr PrimaryGroupSizeLog = 20U;
static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
static const bool MaySupportMemoryTagging = false;
#else
static const scudo::uptr PrimaryRegionSizeLog = 24U;
#endif
+ static const scudo::uptr PrimaryGroupSizeLog = 20U;
static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
static const bool MaySupportMemoryTagging = true;
static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
};
+struct TestConfig4 {
+#if defined(__mips__)
+ // Unable to allocate greater size on QEMU-user.
+ static const scudo::uptr PrimaryRegionSizeLog = 23U;
+#else
+ static const scudo::uptr PrimaryRegionSizeLog = 24U;
+#endif
+ static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
+ static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
+ static const bool MaySupportMemoryTagging = true;
+ static const scudo::uptr PrimaryCompactPtrScale = 3U;
+ static const scudo::uptr PrimaryGroupSizeLog = 20U;
+ typedef scudo::u32 PrimaryCompactPtrT;
+ static const bool PrimaryEnableRandomOffset = true;
+ static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
+};
+
template <typename BaseConfig, typename SizeClassMapT>
struct Config : public BaseConfig {
using SizeClassMap = SizeClassMapT;
#define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \
SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig1) \
SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig2) \
- SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig3)
+ SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig3) \
+ SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig4)
#endif
#define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE) \
static const scudo::uptr PrimaryCompactPtrScale = 0;
static const bool PrimaryEnableRandomOffset = true;
static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
+ static const scudo::uptr PrimaryGroupSizeLog = 20U;
};
// The 64-bit SizeClassAllocator can be easily OOM'd with small region sizes.
std::vector<TransferBatch *> Batches;
const scudo::uptr ClassId = Primary::SizeClassMap::LargestClassId;
const scudo::uptr Size = Primary::getSizeByClassId(ClassId);
+ typename Primary::CacheT::CompactPtrT Blocks[TransferBatch::MaxNumCached];
+
for (scudo::uptr I = 0; I < 10000U; I++) {
TransferBatch *B = Allocator.popBatch(&Cache, ClassId);
if (!B) {
Batches.push_back(B);
}
while (!Batches.empty()) {
- Allocator.pushBatch(ClassId, Batches.back());
+ TransferBatch *B = Batches.back();
Batches.pop_back();
+ B->copyToArray(Blocks);
+ Allocator.pushBlocks(&Cache, ClassId, Blocks, B->getCount());
+ Cache.deallocate(Primary::SizeClassMap::BatchClassId, B);
}
Cache.destroy(nullptr);
Allocator.releaseToOS();
Cache.destroy(nullptr);
EXPECT_GT(Allocator->releaseToOS(), 0U);
}
+
+SCUDO_TYPED_TEST(ScudoPrimaryTest, MemoryGroup) {
+ using Primary = TestAllocator<TypeParam, scudo::DefaultSizeClassMap>;
+ std::unique_ptr<Primary> Allocator(new Primary);
+ Allocator->init(/*ReleaseToOsInterval=*/-1);
+ typename Primary::CacheT Cache;
+ Cache.init(nullptr, Allocator.get());
+ const scudo::uptr Size = 32U;
+ const scudo::uptr ClassId = Primary::SizeClassMap::getClassIdBySize(Size);
+
+ // We will allocate 4 times the group size memory and release all of them. We
+ // expect the free blocks will be classified with groups. Then we will
+ // allocate the same amount of memory as group size and expect the blocks will
+ // have the max address difference smaller or equal to 2 times the group size.
+ // Note that it isn't necessary to be in the range of single group size
+ // because the way we get the group id is doing compact pointer shifting.
+ // According to configuration, the compact pointer may not align to group
+ // size. As a result, the blocks can cross two groups at most.
+ const scudo::uptr GroupSizeMem = (1ULL << Primary::GroupSizeLog);
+ const scudo::uptr PeakAllocationMem = 4 * GroupSizeMem;
+ const scudo::uptr PeakNumberOfAllocations = PeakAllocationMem / Size;
+ const scudo::uptr FinalNumberOfAllocations = GroupSizeMem / Size;
+ std::vector<scudo::uptr> Blocks;
+ std::mt19937 R;
+
+ for (scudo::uptr I = 0; I < PeakNumberOfAllocations; ++I)
+ Blocks.push_back(reinterpret_cast<scudo::uptr>(Cache.allocate(ClassId)));
+
+ std::shuffle(Blocks.begin(), Blocks.end(), R);
+
+ // Release all the allocated blocks, including those held by local cache.
+ while (!Blocks.empty()) {
+ Cache.deallocate(ClassId, reinterpret_cast<void *>(Blocks.back()));
+ Blocks.pop_back();
+ }
+ Cache.drain();
+
+ for (scudo::uptr I = 0; I < FinalNumberOfAllocations; ++I)
+ Blocks.push_back(reinterpret_cast<scudo::uptr>(Cache.allocate(ClassId)));
+
+ EXPECT_LE(*std::max_element(Blocks.begin(), Blocks.end()) -
+ *std::min_element(Blocks.begin(), Blocks.end()),
+ GroupSizeMem * 2);
+}