src/third_party/WebKit/Source/wtf/PartitionAlloc.cpp

   1 /*
   2  * Copyright (C) 2013 Google Inc. All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  *     * Redistributions of source code must retain the above copyright
   9  * notice, this list of conditions and the following disclaimer.
  10  *     * Redistributions in binary form must reproduce the above
  11  * copyright notice, this list of conditions and the following disclaimer
  12  * in the documentation and/or other materials provided with the
  13  * distribution.
  14  *     * Neither the name of Google Inc. nor the names of its
  15  * contributors may be used to endorse or promote products derived from
  16  * this software without specific prior written permission.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29  */
  30
  31 #include "config.h"
  32 #include "wtf/PartitionAlloc.h"
  33
  34 #include <string.h>
  35
  36 #ifndef NDEBUG
  37 #include <stdio.h>
  38 #endif
  39
  40 // Two partition pages are used as guard / metadata page so make sure the super
  41 // page size is bigger.
  42 COMPILE_ASSERT(WTF::kPartitionPageSize * 4 <= WTF::kSuperPageSize, ok_super_page_size);
  43 COMPILE_ASSERT(!(WTF::kSuperPageSize % WTF::kPartitionPageSize), ok_super_page_multiple);
  44 // Four system pages gives us room to hack out a still-guard-paged piece
  45 // of metadata in the middle of a guard partition page.
  46 COMPILE_ASSERT(WTF::kSystemPageSize * 4 <= WTF::kPartitionPageSize, ok_partition_page_size);
  47 COMPILE_ASSERT(!(WTF::kPartitionPageSize % WTF::kSystemPageSize), ok_partition_page_multiple);
  48 COMPILE_ASSERT(sizeof(WTF::PartitionPage) <= WTF::kPageMetadataSize, PartitionPage_not_too_big);
  49 COMPILE_ASSERT(sizeof(WTF::PartitionSuperPageExtentEntry) <= WTF::kPageMetadataSize, PartitionSuperPageExtentEntry_not_too_big);
  50 COMPILE_ASSERT(WTF::kPageMetadataSize * WTF::kNumPartitionPagesPerSuperPage <= WTF::kSystemPageSize, page_metadata_fits_in_hole);
  51 // Check that some of our zanier calculations worked out as expected.
  52 COMPILE_ASSERT(WTF::kGenericSmallestBucket == 8, generic_smallest_bucket);
  53 COMPILE_ASSERT(WTF::kGenericMaxBucketed == 983040, generic_max_bucketed);
  54
  55 namespace WTF {
  56
  57 int PartitionRootBase::gInitializedLock = 0;
  58 bool PartitionRootBase::gInitialized = false;
  59 PartitionPage PartitionRootBase::gSeedPage;
  60 PartitionBucket PartitionRootBase::gPagedBucket;
  61
  62 static size_t partitionBucketNumSystemPages(size_t size)
  63 {
  64     // This works out reasonably for the current bucket sizes of the generic
  65     // allocator, and the current values of partition page size and constants.
  66     // Specifically, we have enough room to always pack the slots perfectly into
  67     // some number of system pages. The only waste is the waste associated with
  68     // unfaulted pages (i.e. wasted address space).
  69     // TODO: we end up using a lot of system pages for very small sizes. For
  70     // example, we'll use 12 system pages for slot size 24. The slot size is
  71     // so small that the waste would be tiny with just 4, or 1, system pages.
  72     // Later, we can investigate whether there are anti-fragmentation benefits
  73     // to using fewer system pages.
  74     double bestWasteRatio = 1.0f;
  75     size_t bestPages = 0;
  76     if (size > kMaxSystemPagesPerSlotSpan * kSystemPageSize) {
  77         ASSERT(!(size % kSystemPageSize));
  78         return size / kSystemPageSize;
  79     }
  80     ASSERT(size <= kMaxSystemPagesPerSlotSpan * kSystemPageSize);
  81     for (size_t i = kNumSystemPagesPerPartitionPage - 1; i <= kMaxSystemPagesPerSlotSpan; ++i) {
  82         size_t pageSize = kSystemPageSize * i;
  83         size_t numSlots = pageSize / size;
  84         size_t waste = pageSize - (numSlots * size);
  85         // Leaving a page unfaulted is not free; the page will occupy an empty page table entry.
  86         // Make a simple attempt to account for that.
  87         size_t numRemainderPages = i & (kNumSystemPagesPerPartitionPage - 1);
  88         size_t numUnfaultedPages = numRemainderPages ? (kNumSystemPagesPerPartitionPage - numRemainderPages) : 0;
  89         waste += sizeof(void*) * numUnfaultedPages;
  90         double wasteRatio = (double) waste / (double) pageSize;
  91         if (wasteRatio < bestWasteRatio) {
  92             bestWasteRatio = wasteRatio;
  93             bestPages = i;
  94         }
  95     }
  96     ASSERT(bestPages > 0);
  97     return bestPages;
  98 }
  99
 100 static void parititonAllocBaseInit(PartitionRootBase* root)
 101 {
 102     ASSERT(!root->initialized);
 103
 104     spinLockLock(&PartitionRootBase::gInitializedLock);
 105     if (!PartitionRootBase::gInitialized) {
 106         PartitionRootBase::gInitialized = true;
 107         // We mark the seed page as free to make sure it is skipped by our
 108         // logic to find a new active page.
 109         PartitionRootBase::gPagedBucket.activePagesHead = &PartitionRootGeneric::gSeedPage;
 110     }
 111     spinLockUnlock(&PartitionRootBase::gInitializedLock);
 112
 113     root->initialized = true;
 114     root->totalSizeOfSuperPages = 0;
 115     root->nextSuperPage = 0;
 116     root->nextPartitionPage = 0;
 117     root->nextPartitionPageEnd = 0;
 118     root->firstExtent = 0;
 119     root->currentExtent = 0;
 120
 121     memset(&root->globalEmptyPageRing, '\0', sizeof(root->globalEmptyPageRing));
 122     root->globalEmptyPageRingIndex = 0;
 123
 124     // This is a "magic" value so we can test if a root pointer is valid.
 125     root->invertedSelf = ~reinterpret_cast<uintptr_t>(root);
 126 }
 127
 128 static void partitionBucketInitBase(PartitionBucket* bucket, PartitionRootBase* root)
 129 {
 130     bucket->activePagesHead = &PartitionRootGeneric::gSeedPage;
 131     bucket->freePagesHead = 0;
 132     bucket->numFullPages = 0;
 133     bucket->numSystemPagesPerSlotSpan = partitionBucketNumSystemPages(bucket->slotSize);
 134 }
 135
 136 void partitionAllocInit(PartitionRoot* root, size_t numBuckets, size_t maxAllocation)
 137 {
 138     parititonAllocBaseInit(root);
 139
 140     root->numBuckets = numBuckets;
 141     root->maxAllocation = maxAllocation;
 142     size_t i;
 143     for (i = 0; i < root->numBuckets; ++i) {
 144         PartitionBucket* bucket = &root->buckets()[i];
 145         if (!i)
 146             bucket->slotSize = kAllocationGranularity;
 147         else
 148             bucket->slotSize = i << kBucketShift;
 149         partitionBucketInitBase(bucket, root);
 150     }
 151 }
 152
 153 void partitionAllocGenericInit(PartitionRootGeneric* root)
 154 {
 155     parititonAllocBaseInit(root);
 156
 157     root->lock = 0;
 158
 159     // Precalculate some shift and mask constants used in the hot path.
 160     // Example: malloc(41) == 101001 binary.
 161     // Order is 6 (1 << 6-1)==32 is highest bit set.
 162     // orderIndex is the next three MSB == 010 == 2.
 163     // subOrderIndexMask is a mask for the remaining bits == 11 (masking to 01 for the subOrderIndex).
 164     size_t order;
 165     for (order = 0; order <= kBitsPerSizet; ++order) {
 166         size_t orderIndexShift;
 167         if (order < kGenericNumBucketsPerOrderBits + 1)
 168             orderIndexShift = 0;
 169         else
 170             orderIndexShift = order - (kGenericNumBucketsPerOrderBits + 1);
 171         root->orderIndexShifts[order] = orderIndexShift;
 172         size_t subOrderIndexMask;
 173         if (order == kBitsPerSizet) {
 174             // This avoids invoking undefined behavior for an excessive shift.
 175             subOrderIndexMask = static_cast<size_t>(-1) >> (kGenericNumBucketsPerOrderBits + 1);
 176         } else {
 177             subOrderIndexMask = ((1 << order) - 1) >> (kGenericNumBucketsPerOrderBits + 1);
 178         }
 179         root->orderSubIndexMasks[order] = subOrderIndexMask;
 180     }
 181
 182     // Set up the actual usable buckets first.
 183     // Note that typical values (i.e. min allocation size of 8) will result in
 184     // invalid buckets (size==9 etc. or more generally, size is not a multiple
 185     // of the smallest allocation granularity).
 186     // We avoid them in the bucket lookup map, but we tolerate them to keep the
 187     // code simpler and the structures more generic.
 188     size_t i, j;
 189     size_t currentSize = kGenericSmallestBucket;
 190     size_t currentIncrement = kGenericSmallestBucket >> kGenericNumBucketsPerOrderBits;
 191     PartitionBucket* bucket = &root->buckets[0];
 192     for (i = 0; i < kGenericNumBucketedOrders; ++i) {
 193         for (j = 0; j < kGenericNumBucketsPerOrder; ++j) {
 194             bucket->slotSize = currentSize;
 195             partitionBucketInitBase(bucket, root);
 196             // Disable invalid buckets so that touching them faults.
 197             if (currentSize % kGenericSmallestBucket)
 198                 bucket->activePagesHead = 0;
 199             currentSize += currentIncrement;
 200             ++bucket;
 201         }
 202         currentIncrement <<= 1;
 203     }
 204     ASSERT(currentSize == 1 << kGenericMaxBucketedOrder);
 205     ASSERT(bucket == &root->buckets[0] + (kGenericNumBucketedOrders * kGenericNumBucketsPerOrder));
 206
 207     // Then set up the fast size -> bucket lookup table.
 208     bucket = &root->buckets[0];
 209     PartitionBucket** bucketPtr = &root->bucketLookups[0];
 210     for (order = 0; order <= kBitsPerSizet; ++order) {
 211         for (j = 0; j < kGenericNumBucketsPerOrder; ++j) {
 212             if (order < kGenericMinBucketedOrder) {
 213                 // Use the bucket of finest granularity for malloc(0) etc.
 214                 *bucketPtr++ = &root->buckets[0];
 215             } else if (order > kGenericMaxBucketedOrder) {
 216                 *bucketPtr++ = &PartitionRootGeneric::gPagedBucket;
 217             } else {
 218                 PartitionBucket* validBucket = bucket;
 219                 // Skip over invalid buckets.
 220                 while (validBucket->slotSize % kGenericSmallestBucket)
 221                     validBucket++;
 222                 *bucketPtr++ = validBucket;
 223                 bucket++;
 224             }
 225         }
 226     }
 227     ASSERT(bucket == &root->buckets[0] + (kGenericNumBucketedOrders * kGenericNumBucketsPerOrder));
 228     ASSERT(bucketPtr == &root->bucketLookups[0] + ((kBitsPerSizet + 1) * kGenericNumBucketsPerOrder));
 229     // And there's one last bucket lookup that will be hit for e.g. malloc(-1),
 230     // which tries to overflow to a non-existant order.
 231     *bucketPtr = &PartitionRootGeneric::gPagedBucket;
 232 }
 233
 234 static bool partitionAllocShutdownBucket(PartitionBucket* bucket)
 235 {
 236     // Failure here indicates a memory leak.
 237     bool noLeaks = !bucket->numFullPages;
 238     PartitionPage* page = bucket->activePagesHead;
 239     while (page) {
 240         if (page->numAllocatedSlots)
 241             noLeaks = false;
 242         page = page->nextPage;
 243     }
 244
 245     return noLeaks;
 246 }
 247
 248 static void partitionAllocBaseShutdown(PartitionRootBase* root)
 249 {
 250     ASSERT(root->initialized);
 251     root->initialized = false;
 252
 253     // Now that we've examined all partition pages in all buckets, it's safe
 254     // to free all our super pages. We first collect the super page pointers
 255     // on the stack because some of them are themselves store in super pages.
 256     char* superPages[kMaxPartitionSize / kSuperPageSize];
 257     size_t numSuperPages = 0;
 258     PartitionSuperPageExtentEntry* entry = root->firstExtent;
 259     while (entry) {
 260         char* superPage = entry->superPageBase;
 261         while (superPage != entry->superPagesEnd) {
 262             superPages[numSuperPages] = superPage;
 263             numSuperPages++;
 264             superPage += kSuperPageSize;
 265         }
 266         entry = entry->next;
 267     }
 268     ASSERT(numSuperPages == root->totalSizeOfSuperPages / kSuperPageSize);
 269     for (size_t i = 0; i < numSuperPages; ++i)
 270         freePages(superPages[i], kSuperPageSize);
 271 }
 272
 273 bool partitionAllocShutdown(PartitionRoot* root)
 274 {
 275     bool noLeaks = true;
 276     size_t i;
 277     for (i = 0; i < root->numBuckets; ++i) {
 278         PartitionBucket* bucket = &root->buckets()[i];
 279         if (!partitionAllocShutdownBucket(bucket))
 280             noLeaks = false;
 281     }
 282
 283     partitionAllocBaseShutdown(root);
 284     return noLeaks;
 285 }
 286
 287 bool partitionAllocGenericShutdown(PartitionRootGeneric* root)
 288 {
 289     bool noLeaks = true;
 290     size_t i;
 291     for (i = 0; i < kGenericNumBucketedOrders * kGenericNumBucketsPerOrder; ++i) {
 292         PartitionBucket* bucket = &root->buckets[i];
 293         if (!partitionAllocShutdownBucket(bucket))
 294             noLeaks = false;
 295     }
 296     partitionAllocBaseShutdown(root);
 297     return noLeaks;
 298 }
 299
 300 static ALWAYS_INLINE void* partitionAllocPartitionPages(PartitionRootBase* root, size_t numPartitionPages)
 301 {
 302     ASSERT(!(reinterpret_cast<uintptr_t>(root->nextPartitionPage) % kPartitionPageSize));
 303     ASSERT(!(reinterpret_cast<uintptr_t>(root->nextPartitionPageEnd) % kPartitionPageSize));
 304     RELEASE_ASSERT(numPartitionPages <= kNumPartitionPagesPerSuperPage);
 305     size_t totalSize = kPartitionPageSize * numPartitionPages;
 306     size_t numPartitionPagesLeft = (root->nextPartitionPageEnd - root->nextPartitionPage) >> kPartitionPageShift;
 307     if (LIKELY(numPartitionPagesLeft >= numPartitionPages)) {
 308         // In this case, we can still hand out pages from the current super page
 309         // allocation.
 310         char* ret = root->nextPartitionPage;
 311         root->nextPartitionPage += totalSize;
 312         return ret;
 313     }
 314
 315     // Need a new super page.
 316     root->totalSizeOfSuperPages += kSuperPageSize;
 317     RELEASE_ASSERT(root->totalSizeOfSuperPages <= kMaxPartitionSize);
 318     char* requestedAddress = root->nextSuperPage;
 319     char* superPage = reinterpret_cast<char*>(allocPages(requestedAddress, kSuperPageSize, kSuperPageSize));
 320     // TODO: handle allocation failure here with PartitionAllocReturnNull.
 321     RELEASE_ASSERT(superPage);
 322     root->nextSuperPage = superPage + kSuperPageSize;
 323     char* ret = superPage + kPartitionPageSize;
 324     root->nextPartitionPage = ret + totalSize;
 325     root->nextPartitionPageEnd = root->nextSuperPage - kPartitionPageSize;
 326     // Make the first partition page in the super page a guard page, but leave a
 327     // hole in the middle.
 328     // This is where we put page metadata and also a tiny amount of extent
 329     // metadata.
 330     setSystemPagesInaccessible(superPage, kSystemPageSize);
 331     setSystemPagesInaccessible(superPage + (kSystemPageSize * 2), kPartitionPageSize - (kSystemPageSize * 2));
 332     // Also make the last partition page a guard page.
 333     setSystemPagesInaccessible(superPage + (kSuperPageSize - kPartitionPageSize), kPartitionPageSize);
 334
 335     // If we were after a specific address, but didn't get it, assume that
 336     // the system chose a lousy address and re-randomize the next
 337     // allocation.
 338     if (requestedAddress && requestedAddress != superPage)
 339         root->nextSuperPage = 0;
 340
 341     // We allocated a new super page so update super page metadata.
 342     // First check if this is a new extent or not.
 343     PartitionSuperPageExtentEntry* latestExtent = reinterpret_cast<PartitionSuperPageExtentEntry*>(partitionSuperPageToMetadataArea(superPage));
 344     PartitionSuperPageExtentEntry* currentExtent = root->currentExtent;
 345     bool isNewExtent = (superPage != requestedAddress);
 346     if (UNLIKELY(isNewExtent)) {
 347         latestExtent->next = 0;
 348         if (UNLIKELY(!currentExtent)) {
 349             root->firstExtent = latestExtent;
 350         } else {
 351             ASSERT(currentExtent->superPageBase);
 352             currentExtent->next = latestExtent;
 353         }
 354         root->currentExtent = latestExtent;
 355         currentExtent = latestExtent;
 356         currentExtent->superPageBase = superPage;
 357         currentExtent->superPagesEnd = superPage + kSuperPageSize;
 358     } else {
 359         // We allocated next to an existing extent so just nudge the size up a little.
 360         currentExtent->superPagesEnd += kSuperPageSize;
 361         ASSERT(ret >= currentExtent->superPageBase && ret < currentExtent->superPagesEnd);
 362     }
 363     // By storing the root in every extent metadata object, we have a fast way
 364     // to go from a pointer within the partition to the root object.
 365     latestExtent->root = root;
 366
 367     return ret;
 368 }
 369
 370 static ALWAYS_INLINE void partitionUnusePage(PartitionPage* page)
 371 {
 372     ASSERT(page->bucket->numSystemPagesPerSlotSpan);
 373     void* addr = partitionPageToPointer(page);
 374     decommitSystemPages(addr, page->bucket->numSystemPagesPerSlotSpan * kSystemPageSize);
 375 }
 376
 377 static ALWAYS_INLINE size_t partitionBucketSlots(const PartitionBucket* bucket)
 378 {
 379     return (bucket->numSystemPagesPerSlotSpan * kSystemPageSize) / bucket->slotSize;
 380 }
 381
 382 static ALWAYS_INLINE size_t partitionBucketPartitionPages(const PartitionBucket* bucket)
 383 {
 384     return (bucket->numSystemPagesPerSlotSpan + (kNumSystemPagesPerPartitionPage - 1)) / kNumSystemPagesPerPartitionPage;
 385 }
 386
 387 static ALWAYS_INLINE void partitionPageReset(PartitionPage* page, PartitionBucket* bucket)
 388 {
 389     ASSERT(page != &PartitionRootGeneric::gSeedPage);
 390     page->numAllocatedSlots = 0;
 391     page->numUnprovisionedSlots = partitionBucketSlots(bucket);
 392     ASSERT(page->numUnprovisionedSlots);
 393     page->bucket = bucket;
 394     page->nextPage = 0;
 395     // NULLing the freelist is not strictly necessary but it makes an ASSERT in partitionPageFillFreelist simpler.
 396     page->freelistHead = 0;
 397     page->pageOffset = 0;
 398     page->freeCacheIndex = -1;
 399     size_t numPartitionPages = partitionBucketPartitionPages(bucket);
 400     size_t i;
 401     char* pageCharPtr = reinterpret_cast<char*>(page);
 402     for (i = 1; i < numPartitionPages; ++i) {
 403         pageCharPtr += kPageMetadataSize;
 404         PartitionPage* secondaryPage = reinterpret_cast<PartitionPage*>(pageCharPtr);
 405         secondaryPage->pageOffset = i;
 406     }
 407 }
 408
 409 static ALWAYS_INLINE char* partitionPageAllocAndFillFreelist(PartitionPage* page)
 410 {
 411     ASSERT(page != &PartitionRootGeneric::gSeedPage);
 412     size_t numSlots = page->numUnprovisionedSlots;
 413     ASSERT(numSlots);
 414     PartitionBucket* bucket = page->bucket;
 415     // We should only get here when _every_ slot is either used or unprovisioned.
 416     // (The third state is "on the freelist". If we have a non-empty freelist, we should not get here.)
 417     ASSERT(numSlots + page->numAllocatedSlots == partitionBucketSlots(bucket));
 418     // Similarly, make explicitly sure that the freelist is empty.
 419     ASSERT(!page->freelistHead);
 420     ASSERT(page->numAllocatedSlots >= 0);
 421
 422     size_t size = bucket->slotSize;
 423     char* base = reinterpret_cast<char*>(partitionPageToPointer(page));
 424     char* returnObject = base + (size * page->numAllocatedSlots);
 425     char* firstFreelistPointer = returnObject + size;
 426     char* firstFreelistPointerExtent = firstFreelistPointer + sizeof(PartitionFreelistEntry*);
 427     // Our goal is to fault as few system pages as possible. We calculate the
 428     // page containing the "end" of the returned slot, and then allow freelist
 429     // pointers to be written up to the end of that page.
 430     char* subPageLimit = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(firstFreelistPointer) + kSystemPageOffsetMask) & kSystemPageBaseMask);
 431     char* slotsLimit = returnObject + (size * page->numUnprovisionedSlots);
 432     char* freelistLimit = subPageLimit;
 433     if (UNLIKELY(slotsLimit < freelistLimit))
 434         freelistLimit = slotsLimit;
 435
 436     size_t numNewFreelistEntries = 0;
 437     if (LIKELY(firstFreelistPointerExtent <= freelistLimit)) {
 438         // Only consider used space in the slot span. If we consider wasted
 439         // space, we may get an off-by-one when a freelist pointer fits in the
 440         // wasted space, but a slot does not.
 441         // We know we can fit at least one freelist pointer.
 442         numNewFreelistEntries = 1;
 443         // Any further entries require space for the whole slot span.
 444         numNewFreelistEntries += (freelistLimit - firstFreelistPointerExtent) / size;
 445     }
 446
 447     // We always return an object slot -- that's the +1 below.
 448     // We do not neccessarily create any new freelist entries, because we cross sub page boundaries frequently for large bucket sizes.
 449     ASSERT(numNewFreelistEntries + 1 <= numSlots);
 450     numSlots -= (numNewFreelistEntries + 1);
 451     page->numUnprovisionedSlots = numSlots;
 452     page->numAllocatedSlots++;
 453
 454     if (LIKELY(numNewFreelistEntries)) {
 455         char* freelistPointer = firstFreelistPointer;
 456         PartitionFreelistEntry* entry = reinterpret_cast<PartitionFreelistEntry*>(freelistPointer);
 457         page->freelistHead = entry;
 458         while (--numNewFreelistEntries) {
 459             freelistPointer += size;
 460             PartitionFreelistEntry* nextEntry = reinterpret_cast<PartitionFreelistEntry*>(freelistPointer);
 461             entry->next = partitionFreelistMask(nextEntry);
 462             entry = nextEntry;
 463         }
 464         entry->next = partitionFreelistMask(0);
 465     } else {
 466         page->freelistHead = 0;
 467     }
 468     return returnObject;
 469 }
 470
 471 // This helper function scans the active page list for a suitable new active
 472 // page, starting at the passed in page.
 473 // When it finds a suitable new active page (one that has free slots), it is
 474 // set as the new active page and true is returned. If there is no suitable new
 475 // active page, false is returned and the current active page is set to null.
 476 // As potential pages are scanned, they are tidied up according to their state.
 477 // Freed pages are swept on to the free page list and full pages are unlinked
 478 // from any list.
 479 static ALWAYS_INLINE bool partitionSetNewActivePage(PartitionPage* page)
 480 {
 481     if (page == &PartitionRootBase::gSeedPage) {
 482         ASSERT(!page->nextPage);
 483         return false;
 484     }
 485
 486     PartitionPage* nextPage = 0;
 487     PartitionBucket* bucket = page->bucket;
 488
 489     for (; page; page = nextPage) {
 490         nextPage = page->nextPage;
 491         ASSERT(page->bucket == bucket);
 492         ASSERT(page != bucket->freePagesHead);
 493         ASSERT(!bucket->freePagesHead || page != bucket->freePagesHead->nextPage);
 494
 495         // Page is usable if it has something on the freelist, or unprovisioned
 496         // slots that can be turned into a freelist.
 497         if (LIKELY(page->freelistHead != 0) || LIKELY(page->numUnprovisionedSlots)) {
 498             bucket->activePagesHead = page;
 499             return true;
 500         }
 501
 502         ASSERT(page->numAllocatedSlots >= 0);
 503         if (LIKELY(page->numAllocatedSlots == 0)) {
 504             ASSERT(page->freeCacheIndex == -1);
 505             // We hit a free page, so shepherd it on to the free page list.
 506             page->nextPage = bucket->freePagesHead;
 507             bucket->freePagesHead = page;
 508         } else {
 509             // If we get here, we found a full page. Skip over it too, and also
 510             // tag it as full (via a negative value). We need it tagged so that
 511             // free'ing can tell, and move it back into the active page list.
 512             ASSERT(page->numAllocatedSlots == static_cast<int>(partitionBucketSlots(bucket)));
 513             page->numAllocatedSlots = -page->numAllocatedSlots;
 514             ++bucket->numFullPages;
 515             // numFullPages is a uint16_t for efficient packing so guard against
 516             // overflow to be safe.
 517             RELEASE_ASSERT(bucket->numFullPages);
 518             // Not necessary but might help stop accidents.
 519             page->nextPage = 0;
 520         }
 521     }
 522
 523     bucket->activePagesHead = 0;
 524     return false;
 525 }
 526
 527 static ALWAYS_INLINE void* partitionDirectMap(PartitionRootBase* root, int flags, size_t size)
 528 {
 529     size += kSystemPageOffsetMask;
 530     size &= kSystemPageBaseMask;
 531     // Because we need to fake looking like a super page, We need to allocate
 532     // a bunch of system pages more than "size":
 533     // - The first few system pages are the partition page in which the super
 534     // page metadata is stored. We fault just one system page out of a partition
 535     // page sized clump.
 536     // - We add a trailing guard page.
 537     size_t mapSize = size + kPartitionPageSize + kSystemPageSize;
 538     // Round up to the allocation granularity.
 539     mapSize += kPageAllocationGranularityOffsetMask;
 540     mapSize &= kPageAllocationGranularityBaseMask;
 541
 542     // TODO: we may want to let the operating system place these allocations
 543     // where it pleases. On 32-bit, this might limit address space
 544     // fragmentation and on 64-bit, this might have useful savings for TLB
 545     // and page table overhead.
 546     // TODO: if upsizing realloc()s are common on large sizes, we could
 547     // consider over-allocating address space on 64-bit, "just in case".
 548     // TODO: consider pre-populating page tables (e.g. MAP_POPULATE on Linux,
 549     // MADV_WILLNEED on POSIX).
 550     // TODO: these pages will be zero-filled. Consider internalizing an
 551     // allocZeroed() API so we can avoid a memset() entirely in this case.
 552     char* ptr = reinterpret_cast<char*>(allocPages(0, mapSize, kSuperPageSize));
 553     if (!ptr) {
 554         if (flags & PartitionAllocReturnNull)
 555             return 0;
 556         RELEASE_ASSERT(false);
 557     }
 558     char* ret = ptr + kPartitionPageSize;
 559     // TODO: due to all the guard paging, this arrangement creates 4 mappings.
 560     // We could get it down to three by using read-only for the metadata page,
 561     // or perhaps two by leaving out the trailing guard page on 64-bit.
 562     setSystemPagesInaccessible(ptr, kSystemPageSize);
 563     setSystemPagesInaccessible(ptr + (kSystemPageSize * 2), kPartitionPageSize - (kSystemPageSize * 2));
 564     setSystemPagesInaccessible(ret + size, kSystemPageSize);
 565
 566     PartitionSuperPageExtentEntry* extent = reinterpret_cast<PartitionSuperPageExtentEntry*>(partitionSuperPageToMetadataArea(ptr));
 567     extent->root = root;
 568     PartitionPage* page = partitionPointerToPageNoAlignmentCheck(ret);
 569     PartitionBucket* bucket = reinterpret_cast<PartitionBucket*>(reinterpret_cast<char*>(page) + kPageMetadataSize);
 570     page->freelistHead = 0;
 571     page->nextPage = 0;
 572     page->bucket = bucket;
 573     page->numAllocatedSlots = 1;
 574     page->numUnprovisionedSlots = 0;
 575     page->pageOffset = 0;
 576     page->freeCacheIndex = 0;
 577
 578     bucket->activePagesHead = 0;
 579     bucket->freePagesHead = 0;
 580     bucket->slotSize = size;
 581     bucket->numSystemPagesPerSlotSpan = 0;
 582     bucket->numFullPages = 0;
 583
 584     return ret;
 585 }
 586
 587 static ALWAYS_INLINE void partitionDirectUnmap(PartitionPage* page)
 588 {
 589     size_t unmapSize = page->bucket->slotSize;
 590     // Add on the size of the trailing guard page and preceeding partition
 591     // page, then round up to allocation granularity.
 592     unmapSize += kPartitionPageSize + kSystemPageSize;
 593     unmapSize += kPageAllocationGranularityOffsetMask;
 594     unmapSize &= kPageAllocationGranularityBaseMask;
 595
 596     char* ptr = reinterpret_cast<char*>(partitionPageToPointer(page));
 597     // Account for the mapping starting a partition page before the actual
 598     // allocation address.
 599     ptr -= kPartitionPageSize;
 600
 601     freePages(ptr, unmapSize);
 602 }
 603
 604 void* partitionAllocSlowPath(PartitionRootBase* root, int flags, size_t size, PartitionBucket* bucket)
 605 {
 606     // The slow path is called when the freelist is empty.
 607     ASSERT(!bucket->activePagesHead->freelistHead);
 608
 609     // For the partitionAllocGeneric API, we have a bunch of buckets marked
 610     // as special cases. We bounce them through to the slow path so that we
 611     // can still have a blazing fast hot path due to lack of corner-case
 612     // branches.
 613     bool returnNull = flags & PartitionAllocReturnNull;
 614     if (UNLIKELY(!bucket->numSystemPagesPerSlotSpan)) {
 615         ASSERT(size > kGenericMaxBucketed);
 616         ASSERT(bucket == &PartitionRootBase::gPagedBucket);
 617         if (size > INT_MAX - kSystemPageSize) {
 618             if (returnNull)
 619                 return 0;
 620             RELEASE_ASSERT(false);
 621         }
 622         return partitionDirectMap(root, flags, size);
 623     }
 624
 625     // First, look for a usable page in the existing active pages list.
 626     // Change active page, accepting the current page as a candidate.
 627     if (LIKELY(partitionSetNewActivePage(bucket->activePagesHead))) {
 628         PartitionPage* newPage = bucket->activePagesHead;
 629         if (LIKELY(newPage->freelistHead != 0)) {
 630             PartitionFreelistEntry* ret = newPage->freelistHead;
 631             newPage->freelistHead = partitionFreelistMask(ret->next);
 632             newPage->numAllocatedSlots++;
 633             return ret;
 634         }
 635         ASSERT(newPage->numUnprovisionedSlots);
 636         return partitionPageAllocAndFillFreelist(newPage);
 637     }
 638
 639     // Second, look in our list of freed but reserved pages.
 640     PartitionPage* newPage = bucket->freePagesHead;
 641     if (LIKELY(newPage != 0)) {
 642         ASSERT(newPage != &PartitionRootGeneric::gSeedPage);
 643         ASSERT(!newPage->freelistHead);
 644         ASSERT(!newPage->numAllocatedSlots);
 645         ASSERT(!newPage->numUnprovisionedSlots);
 646         ASSERT(newPage->freeCacheIndex == -1);
 647         bucket->freePagesHead = newPage->nextPage;
 648     } else {
 649         // Third. If we get here, we need a brand new page.
 650         size_t numPartitionPages = partitionBucketPartitionPages(bucket);
 651         void* rawNewPage = partitionAllocPartitionPages(root, numPartitionPages);
 652         // Skip the alignment check because it depends on page->bucket, which is not yet set.
 653         newPage = partitionPointerToPageNoAlignmentCheck(rawNewPage);
 654     }
 655
 656     partitionPageReset(newPage, bucket);
 657     bucket->activePagesHead = newPage;
 658     return partitionPageAllocAndFillFreelist(newPage);
 659 }
 660
 661 static ALWAYS_INLINE void partitionFreePage(PartitionPage* page)
 662 {
 663     ASSERT(page->freelistHead);
 664     ASSERT(!page->numAllocatedSlots);
 665     partitionUnusePage(page);
 666     // We actually leave the freed page in the active list. We'll sweep it on
 667     // to the free page list when we next walk the active page list. Pulling
 668     // this trick enables us to use a singly-linked page list for all cases,
 669     // which is critical in keeping the page metadata structure down to 32
 670     // bytes in size.
 671     page->freelistHead = 0;
 672     page->numUnprovisionedSlots = 0;
 673 }
 674
 675 static ALWAYS_INLINE void partitionRegisterEmptyPage(PartitionPage* page)
 676 {
 677     PartitionRootBase* root = partitionPageToRoot(page);
 678     // If the page is already registered as empty, give it another life.
 679     if (page->freeCacheIndex != -1) {
 680         ASSERT(page->freeCacheIndex >= 0);
 681         ASSERT(static_cast<unsigned>(page->freeCacheIndex) < kMaxFreeableSpans);
 682         ASSERT(root->globalEmptyPageRing[page->freeCacheIndex] == page);
 683         root->globalEmptyPageRing[page->freeCacheIndex] = 0;
 684     }
 685
 686     size_t currentIndex = root->globalEmptyPageRingIndex;
 687     PartitionPage* pageToFree = root->globalEmptyPageRing[currentIndex];
 688     // The page might well have been re-activated, filled up, etc. before we get
 689     // around to looking at it here.
 690     if (pageToFree) {
 691         ASSERT(pageToFree != &PartitionRootBase::gSeedPage);
 692         ASSERT(pageToFree->freeCacheIndex >= 0);
 693         ASSERT(static_cast<unsigned>(pageToFree->freeCacheIndex) < kMaxFreeableSpans);
 694         ASSERT(pageToFree == root->globalEmptyPageRing[pageToFree->freeCacheIndex]);
 695         if (!pageToFree->numAllocatedSlots && pageToFree->freelistHead) {
 696             // The page is still empty, and not freed, so _really_ free it.
 697             partitionFreePage(pageToFree);
 698         }
 699         pageToFree->freeCacheIndex = -1;
 700     }
 701
 702     // We put the empty slot span on our global list of "pages that were once
 703     // empty". thus providing it a bit of breathing room to get re-used before
 704     // we really free it. This improves performance, particularly on Mac OS X
 705     // which has subpar memory management performance.
 706     root->globalEmptyPageRing[currentIndex] = page;
 707     page->freeCacheIndex = currentIndex;
 708     ++currentIndex;
 709     if (currentIndex == kMaxFreeableSpans)
 710         currentIndex = 0;
 711     root->globalEmptyPageRingIndex = currentIndex;
 712 }
 713
 714 void partitionFreeSlowPath(PartitionPage* page)
 715 {
 716     PartitionBucket* bucket = page->bucket;
 717     ASSERT(page != &PartitionRootGeneric::gSeedPage);
 718     ASSERT(bucket->activePagesHead != &PartitionRootGeneric::gSeedPage);
 719     if (LIKELY(page->numAllocatedSlots == 0)) {
 720         // Page became fully unused.
 721         if (UNLIKELY(!bucket->numSystemPagesPerSlotSpan)) {
 722             partitionDirectUnmap(page);
 723             return;
 724         }
 725         // If it's the current page, attempt to change it. We'd prefer to leave
 726         // the page empty as a gentle force towards defragmentation.
 727         if (LIKELY(page == bucket->activePagesHead) && page->nextPage) {
 728             if (partitionSetNewActivePage(page->nextPage)) {
 729                 ASSERT(bucket->activePagesHead != page);
 730                 // Link the empty page back in after the new current page, to
 731                 // avoid losing a reference to it.
 732                 // TODO: consider walking the list to link the empty page after
 733                 // all non-empty pages?
 734                 PartitionPage* currentPage = bucket->activePagesHead;
 735                 page->nextPage = currentPage->nextPage;
 736                 currentPage->nextPage = page;
 737             } else {
 738                 bucket->activePagesHead = page;
 739                 page->nextPage = 0;
 740             }
 741         }
 742         partitionRegisterEmptyPage(page);
 743     } else {
 744         // Ensure that the page is full. That's the only valid case if we
 745         // arrive here.
 746         ASSERT(page->numAllocatedSlots < 0);
 747         // A transition of numAllocatedSlots from 0 to -1 is not legal, and
 748         // likely indicates a double-free.
 749         RELEASE_ASSERT(page->numAllocatedSlots != -1);
 750         page->numAllocatedSlots = -page->numAllocatedSlots - 2;
 751         ASSERT(page->numAllocatedSlots == static_cast<int>(partitionBucketSlots(bucket) - 1));
 752         // Fully used page became partially used. It must be put back on the
 753         // non-full page list. Also make it the current page to increase the
 754         // chances of it being filled up again. The old current page will be
 755         // the next page.
 756         page->nextPage = bucket->activePagesHead;
 757         bucket->activePagesHead = page;
 758         --bucket->numFullPages;
 759         // Special case: for a partition page with just a single slot, it may
 760         // now be empty and we want to run it through the empty logic.
 761         if (UNLIKELY(page->numAllocatedSlots == 0))
 762             partitionFreeSlowPath(page);
 763     }
 764 }
 765
 766 void* partitionReallocGeneric(PartitionRootGeneric* root, void* ptr, size_t newSize)
 767 {
 768 #if defined(MEMORY_TOOL_REPLACES_ALLOCATOR)
 769     return realloc(ptr, newSize);
 770 #else
 771     if (UNLIKELY(!ptr))
 772         return partitionAllocGeneric(root, newSize);
 773     if (UNLIKELY(!newSize)) {
 774         partitionFreeGeneric(root, ptr);
 775         return 0;
 776     }
 777
 778     // TODO: note that tcmalloc will "ignore" a downsizing realloc() unless the
 779     // new size is a significant percentage smaller. We could do the same if we
 780     // determine it is a win.
 781     void* realPtr = partitionCookieFreePointerAdjust(ptr);
 782     ASSERT(partitionPointerIsValid(realPtr));
 783     PartitionPage* oldPage = partitionPointerToPage(realPtr);
 784     PartitionBucket* oldBucket = oldPage->bucket;
 785
 786     size_t allocSize = partitionCookieSizeAdjustAdd(newSize);
 787     PartitionBucket* newBucket = partitionGenericSizeToBucket(root, allocSize);
 788
 789     // TODO: for a downsize on a direct mapped allocation, we really should
 790     // just de-commit the correct number of pages off the end.
 791     if (oldBucket == newBucket)
 792         return ptr;
 793
 794     // This realloc cannot be resized in-place. Sadness.
 795     void* ret = partitionAllocGeneric(root, newSize);
 796     size_t copySize = oldPage->bucket->slotSize;
 797     copySize = partitionCookieSizeAdjustSubtract(copySize);
 798     if (newSize < copySize)
 799         copySize = newSize;
 800
 801     memcpy(ret, ptr, copySize);
 802     partitionFreeGeneric(root, ptr);
 803     return ret;
 804 #endif
 805 }
 806
 807 #ifndef NDEBUG
 808
 809 void partitionDumpStats(const PartitionRoot& root)
 810 {
 811     size_t i;
 812     size_t totalLive = 0;
 813     size_t totalResident = 0;
 814     size_t totalFreeable = 0;
 815     for (i = 0; i < root.numBuckets; ++i) {
 816         const PartitionBucket& bucket = root.buckets()[i];
 817         if (bucket.activePagesHead == &PartitionRootGeneric::gSeedPage && !bucket.freePagesHead && !bucket.numFullPages) {
 818             // Empty bucket with no freelist or full pages. Skip reporting it.
 819             continue;
 820         }
 821         size_t numFreePages = 0;
 822         PartitionPage* freePages = bucket.freePagesHead;
 823         while (freePages) {
 824             ++numFreePages;
 825             freePages = freePages->nextPage;
 826         }
 827         size_t bucketSlotSize = bucket.slotSize;
 828         size_t bucketNumSlots = partitionBucketSlots(&bucket);
 829         size_t bucketUsefulStorage = bucketSlotSize * bucketNumSlots;
 830         size_t bucketPageSize = bucket.numSystemPagesPerSlotSpan * kSystemPageSize;
 831         size_t bucketWaste = bucketPageSize - bucketUsefulStorage;
 832         size_t numActiveBytes = bucket.numFullPages * bucketUsefulStorage;
 833         size_t numResidentBytes = bucket.numFullPages * bucketPageSize;
 834         size_t numFreeableBytes = 0;
 835         size_t numActivePages = 0;
 836         const PartitionPage* page = bucket.activePagesHead;
 837         do {
 838             if (page != &PartitionRootGeneric::gSeedPage) {
 839                 ++numActivePages;
 840                 numActiveBytes += (page->numAllocatedSlots * bucketSlotSize);
 841                 size_t pageBytesResident = (bucketNumSlots - page->numUnprovisionedSlots) * bucketSlotSize;
 842                 // Round up to system page size.
 843                 pageBytesResident = (pageBytesResident + kSystemPageOffsetMask) & kSystemPageBaseMask;
 844                 numResidentBytes += pageBytesResident;
 845                 if (!page->numAllocatedSlots)
 846                     numFreeableBytes += pageBytesResident;
 847             }
 848             page = page->nextPage;
 849         } while (page != bucket.activePagesHead);
 850         totalLive += numActiveBytes;
 851         totalResident += numResidentBytes;
 852         totalFreeable += numFreeableBytes;
 853         printf("bucket size %zu (pageSize %zu waste %zu): %zu alloc/%zu commit/%zu freeable bytes, %zu/%zu/%zu full/active/free pages\n", bucketSlotSize, bucketPageSize, bucketWaste, numActiveBytes, numResidentBytes, numFreeableBytes, static_cast<size_t>(bucket.numFullPages), numActivePages, numFreePages);
 854     }
 855     printf("total live: %zu bytes\n", totalLive);
 856     printf("total resident: %zu bytes\n", totalResident);
 857     printf("total freeable: %zu bytes\n", totalFreeable);
 858     fflush(stdout);
 859 }
 860
 861 #endif // !NDEBUG
 862
 863 } // namespace WTF
 864