From 9c26f51f5e178ac0fda98419e3a61d205d3b58b1 Mon Sep 17 00:00:00 2001
From: Chia-hung Duan <chiahungduan@google.com>
Date: Thu, 13 Oct 2022 22:52:04 +0000
Subject: [PATCH] [scudo] Support partial page releasing

Block grouping enables us doing partial page releasing so that we can
release the pages in a finer granularity. Which means we don't need to
visit all blocks to determine which pages are unused. Besides, this
means we can do incremental page releasing depends on the number fo free
blocks.

Reviewed By: cryptoad, cferris

Differential Revision: https://reviews.llvm.org/D134226
---
 compiler-rt/lib/scudo/standalone/local_cache.h     |  7 +++
 compiler-rt/lib/scudo/standalone/primary32.h       | 51 +++++++++++++++++-----
 compiler-rt/lib/scudo/standalone/primary64.h       | 50 +++++++++++++++++----
 .../lib/scudo/standalone/tests/combined_test.cpp   |  2 +-
 4 files changed, 91 insertions(+), 19 deletions(-)
diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h
index 7f27047..6e5b09f 100644
--- a/compiler-rt/lib/scudo/standalone/local_cache.h
+++ b/compiler-rt/lib/scudo/standalone/local_cache.h
@@ -65,6 +65,13 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
     uptr GroupId;
     // Cache value of TransferBatch::getMaxCached()
     u16 MaxCachedPerBatch;
+    // Number of blocks pushed into this group. This is an increment-only
+    // counter.
+    uptr PushedBlocks;
+    // This is used to track how many blocks are pushed since last time we
+    // checked `PushedBlocks`. It's useful for page releasing to determine the
+    // usage of a BatchGroup.
+    uptr PushedBlocksAtLastCheckpoint;
     // Blocks are managed by TransferBatch in a list.
     SinglyLinkedList<TransferBatch> Batches;
   };
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index b411a3b..fb6cb53 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -421,6 +421,8 @@ private:
 
       BG->GroupId = GroupId;
       BG->Batches.push_front(TB);
+      BG->PushedBlocks = 0;
+      BG->PushedBlocksAtLastCheckpoint = 0;
       BG->MaxCachedPerBatch =
           TransferBatch::getMaxCached(getSizeByClassId(ClassId));
 
@@ -446,6 +448,8 @@ private:
         CurBatch->appendFromArray(&Array[I], AppendSize);
         I += AppendSize;
       }
+
+      BG->PushedBlocks += Size;
     };
 
     BatchGroup *Cur = Sci->FreeList.front();
@@ -652,16 +656,13 @@ private:
     if (BytesPushed < PageSize)
       return 0; // Nothing new to release.
 
+    const bool CheckDensity = BlockSize < PageSize / 16U;
     // Releasing smaller blocks is expensive, so we want to make sure that a
     // significant amount of bytes are free, and that there has been a good
     // amount of batches pushed to the freelist before attempting to release.
-    if (BlockSize < PageSize / 16U) {
+    if (CheckDensity) {
       if (!Force && BytesPushed < Sci->AllocatedUser / 16U)
         return 0;
-      // We want 8x% to 9x% free bytes (the larger the block, the lower the %).
-      if ((BytesInFreeList * 100U) / Sci->AllocatedUser <
-          (100U - 1U - BlockSize / 16U))
-        return 0;
     }
 
     if (!Force) {
@@ -682,17 +683,47 @@ private:
     uptr TotalReleasedBytes = 0;
     const uptr Base = First * RegionSize;
     const uptr NumberOfRegions = Last - First + 1U;
+    const uptr GroupSize = (1U << GroupSizeLog);
+    const uptr CurRegionGroupId =
+        compactPtrGroup(compactPtr(ClassId, Sci->CurrentRegion));
+
     ReleaseRecorder Recorder(Base);
-    auto SkipRegion = [this, First, ClassId](uptr RegionIndex) {
-      return (PossibleRegions[First + RegionIndex] - 1U) != ClassId;
-    };
+    PageReleaseContext Context(BlockSize, RegionSize, NumberOfRegions);
+
     auto DecompactPtr = [](CompactPtrT CompactPtr) {
       return reinterpret_cast<uptr>(CompactPtr);
     };
-    PageReleaseContext Context(BlockSize, RegionSize, NumberOfRegions);
-    for (BatchGroup &BG : Sci->FreeList)
+    for (BatchGroup &BG : Sci->FreeList) {
+      const uptr PushedBytesDelta =
+          BG.PushedBlocks - BG.PushedBlocksAtLastCheckpoint;
+      if (PushedBytesDelta * BlockSize < PageSize)
+        continue;
+
+      uptr AllocatedGroupSize =
+          BG.GroupId == CurRegionGroupId ? Sci->CurrentRegionAllocated :
+                                           GroupSize;
+      if (AllocatedGroupSize == 0) continue;
+
+      const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch +
+          BG.Batches.back()->getCount();
+      const uptr BytesInBG = NumBlocks * BlockSize;
+      // Given the randomness property, we try to release the pages only if the
+      // bytes used by free blocks exceed certain proportion of allocated
+      // spaces.
+      if (CheckDensity && (BytesInBG * 100U) / AllocatedGroupSize <
+          (100U - 1U - BlockSize / 16U)) {
+        continue;
+      }
+
+      BG.PushedBlocksAtLastCheckpoint = BG.PushedBlocks;
+      // Note that we don't always visit blocks in each BatchGroup so that we
+      // may miss the chance of releasing certain pages that cross BatchGroups.
       Context.markFreeBlocks(BG.Batches, DecompactPtr, Base);
+    }
 
+    auto SkipRegion = [this, First, ClassId](uptr RegionIndex) {
+      return (PossibleRegions[First + RegionIndex] - 1U) != ClassId;
+    };
     releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
 
     if (Recorder.getReleasedRangesCount() > 0) {
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 84b811a..2b6e71c 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -370,6 +370,9 @@ private:
   static uptr compactPtrGroup(CompactPtrT CompactPtr) {
     return CompactPtr >> (GroupSizeLog - CompactPtrScale);
   }
+  static uptr batchGroupBase(uptr Base, uptr GroupId) {
+    return (GroupId << GroupSizeLog) + Base;
+  }
 
   // Push the blocks to their batch group. The layout will be like,
   //
@@ -424,6 +427,8 @@ private:
 
       BG->GroupId = GroupId;
       BG->Batches.push_front(TB);
+      BG->PushedBlocks = 0;
+      BG->PushedBlocksAtLastCheckpoint = 0;
       BG->MaxCachedPerBatch =
           TransferBatch::getMaxCached(getSizeByClassId(ClassId));
 
@@ -450,6 +455,8 @@ private:
         CurBatch->appendFromArray(&Array[I], AppendSize);
         I += AppendSize;
       }
+
+      BG->PushedBlocks += Size;
     };
 
     BatchGroup *Cur = Region->FreeList.front();
@@ -661,16 +668,13 @@ private:
     if (BytesPushed < PageSize)
       return 0; // Nothing new to release.
 
+    bool CheckDensity = BlockSize < PageSize / 16U;
     // Releasing smaller blocks is expensive, so we want to make sure that a
     // significant amount of bytes are free, and that there has been a good
     // amount of batches pushed to the freelist before attempting to release.
-    if (BlockSize < PageSize / 16U) {
+    if (CheckDensity) {
       if (!Force && BytesPushed < Region->AllocatedUser / 16U)
         return 0;
-      // We want 8x% to 9x% free bytes (the larger the block, the lower the %).
-      if ((BytesInFreeList * 100U) / Region->AllocatedUser <
-          (100U - 1U - BlockSize / 16U))
-        return 0;
     }
 
     if (!Force) {
@@ -684,16 +688,46 @@ private:
       }
     }
 
+    const uptr GroupSize = (1U << GroupSizeLog);
+    const uptr AllocatedUserEnd = Region->AllocatedUser + Region->RegionBeg;
     ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data);
+    PageReleaseContext Context(BlockSize, RegionSize, /*NumberOfRegions=*/1U);
+
     const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId);
     auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) {
       return decompactPtrInternal(CompactPtrBase, CompactPtr);
     };
-    auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
-    PageReleaseContext Context(BlockSize, RegionSize, /*NumberOfRegions=*/1U);
-    for (BatchGroup &BG : Region->FreeList)
+    for (BatchGroup &BG : Region->FreeList) {
+      const uptr PushedBytesDelta =
+          BG.PushedBlocks - BG.PushedBlocksAtLastCheckpoint;
+      if (PushedBytesDelta * BlockSize < PageSize)
+        continue;
+      const uptr BatchGroupEnd =
+          batchGroupBase(BG.GroupId, CompactPtrBase) + GroupSize;
+      const uptr AllocatedGroupSize =
+          AllocatedUserEnd >= BatchGroupEnd ? GroupSize :
+                                              AllocatedUserEnd - BatchGroupEnd;
+      if (AllocatedGroupSize == 0) continue;
+
+      const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch +
+          BG.Batches.back()->getCount();
+      const uptr BytesInBG = NumBlocks * BlockSize;
+      // Given the randomness property, we try to release the pages only if the
+      // bytes used by free blocks exceed certain proportion of group size. Note
+      // that this heuristic only applies when all the spaces in a BatchGroup
+      // are allocated.
+      if (CheckDensity && (BytesInBG * 100U) / AllocatedGroupSize <
+          (100U - 1U - BlockSize / 16U)) {
+        continue;
+      }
+
+      BG.PushedBlocksAtLastCheckpoint = BG.PushedBlocks;
+      // Note that we don't always visit blocks in each BatchGroup so that we
+      // may miss the chance of releasing certain pages that cross BatchGroups.
       Context.markFreeBlocks(BG.Batches, DecompactPtr, Region->RegionBeg);
+    }
 
+    auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
     releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
 
     if (Recorder.getReleasedRangesCount() > 0) {
diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
index 5b9d471..02a6e04 100644
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -506,7 +506,7 @@ struct DeathSizeClassConfig {
   static const scudo::uptr MinSizeLog = 10;
   static const scudo::uptr MidSizeLog = 10;
   static const scudo::uptr MaxSizeLog = 13;
-  static const scudo::u16 MaxNumCachedHint = 4;
+  static const scudo::u16 MaxNumCachedHint = 8;
   static const scudo::uptr MaxBytesCachedLog = 12;
   static const scudo::uptr SizeDelta = 0;
 };
-- 
2.7.4