2 * Copyright (C) 2013 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "wtf/PartitionAlloc.h"
40 // Two partition pages are used as guard / metadata page so make sure the super
41 // page size is bigger.
42 COMPILE_ASSERT(WTF::kPartitionPageSize * 4 <= WTF::kSuperPageSize, ok_super_page_size);
43 COMPILE_ASSERT(!(WTF::kSuperPageSize % WTF::kPartitionPageSize), ok_super_page_multiple);
44 // Four system pages gives us room to hack out a still-guard-paged piece
45 // of metadata in the middle of a guard partition page.
46 COMPILE_ASSERT(WTF::kSystemPageSize * 4 <= WTF::kPartitionPageSize, ok_partition_page_size);
47 COMPILE_ASSERT(!(WTF::kPartitionPageSize % WTF::kSystemPageSize), ok_partition_page_multiple);
48 COMPILE_ASSERT(sizeof(WTF::PartitionPage) <= WTF::kPageMetadataSize, PartitionPage_not_too_big);
49 COMPILE_ASSERT(sizeof(WTF::PartitionBucket) <= WTF::kPageMetadataSize, PartitionBucket_not_too_big);
50 COMPILE_ASSERT(sizeof(WTF::PartitionSuperPageExtentEntry) <= WTF::kPageMetadataSize, PartitionSuperPageExtentEntry_not_too_big);
51 COMPILE_ASSERT(WTF::kPageMetadataSize * WTF::kNumPartitionPagesPerSuperPage <= WTF::kSystemPageSize, page_metadata_fits_in_hole);
52 // Check that some of our zanier calculations worked out as expected.
53 COMPILE_ASSERT(WTF::kGenericSmallestBucket == 8, generic_smallest_bucket);
54 COMPILE_ASSERT(WTF::kGenericMaxBucketed == 983040, generic_max_bucketed);
58 int PartitionRootBase::gInitializedLock = 0;
59 bool PartitionRootBase::gInitialized = false;
60 PartitionPage PartitionRootBase::gSeedPage;
61 PartitionBucket PartitionRootBase::gPagedBucket;
63 static uint16_t partitionBucketNumSystemPages(size_t size)
65 // This works out reasonably for the current bucket sizes of the generic
66 // allocator, and the current values of partition page size and constants.
67 // Specifically, we have enough room to always pack the slots perfectly into
68 // some number of system pages. The only waste is the waste associated with
69 // unfaulted pages (i.e. wasted address space).
70 // TODO: we end up using a lot of system pages for very small sizes. For
71 // example, we'll use 12 system pages for slot size 24. The slot size is
72 // so small that the waste would be tiny with just 4, or 1, system pages.
73 // Later, we can investigate whether there are anti-fragmentation benefits
74 // to using fewer system pages.
75 double bestWasteRatio = 1.0f;
76 uint16_t bestPages = 0;
77 if (size > kMaxSystemPagesPerSlotSpan * kSystemPageSize) {
78 ASSERT(!(size % kSystemPageSize));
79 return static_cast<uint16_t>(size / kSystemPageSize);
81 ASSERT(size <= kMaxSystemPagesPerSlotSpan * kSystemPageSize);
82 for (uint16_t i = kNumSystemPagesPerPartitionPage - 1; i <= kMaxSystemPagesPerSlotSpan; ++i) {
83 size_t pageSize = kSystemPageSize * i;
84 size_t numSlots = pageSize / size;
85 size_t waste = pageSize - (numSlots * size);
86 // Leaving a page unfaulted is not free; the page will occupy an empty page table entry.
87 // Make a simple attempt to account for that.
88 size_t numRemainderPages = i & (kNumSystemPagesPerPartitionPage - 1);
89 size_t numUnfaultedPages = numRemainderPages ? (kNumSystemPagesPerPartitionPage - numRemainderPages) : 0;
90 waste += sizeof(void*) * numUnfaultedPages;
91 double wasteRatio = (double) waste / (double) pageSize;
92 if (wasteRatio < bestWasteRatio) {
93 bestWasteRatio = wasteRatio;
97 ASSERT(bestPages > 0);
101 static void parititonAllocBaseInit(PartitionRootBase* root)
103 ASSERT(!root->initialized);
105 spinLockLock(&PartitionRootBase::gInitializedLock);
106 if (!PartitionRootBase::gInitialized) {
107 PartitionRootBase::gInitialized = true;
108 // We mark the seed page as free to make sure it is skipped by our
109 // logic to find a new active page.
110 PartitionRootBase::gPagedBucket.activePagesHead = &PartitionRootGeneric::gSeedPage;
112 spinLockUnlock(&PartitionRootBase::gInitializedLock);
114 root->initialized = true;
115 root->totalSizeOfCommittedPages = 0;
116 root->totalSizeOfSuperPages = 0;
117 root->nextSuperPage = 0;
118 root->nextPartitionPage = 0;
119 root->nextPartitionPageEnd = 0;
120 root->firstExtent = 0;
121 root->currentExtent = 0;
123 memset(&root->globalEmptyPageRing, '\0', sizeof(root->globalEmptyPageRing));
124 root->globalEmptyPageRingIndex = 0;
126 // This is a "magic" value so we can test if a root pointer is valid.
127 root->invertedSelf = ~reinterpret_cast<uintptr_t>(root);
130 static void partitionBucketInitBase(PartitionBucket* bucket, PartitionRootBase* root)
132 bucket->activePagesHead = &PartitionRootGeneric::gSeedPage;
133 bucket->freePagesHead = 0;
134 bucket->numFullPages = 0;
135 bucket->numSystemPagesPerSlotSpan = partitionBucketNumSystemPages(bucket->slotSize);
138 void partitionAllocInit(PartitionRoot* root, size_t numBuckets, size_t maxAllocation)
140 parititonAllocBaseInit(root);
142 root->numBuckets = numBuckets;
143 root->maxAllocation = maxAllocation;
145 for (i = 0; i < root->numBuckets; ++i) {
146 PartitionBucket* bucket = &root->buckets()[i];
148 bucket->slotSize = kAllocationGranularity;
150 bucket->slotSize = i << kBucketShift;
151 partitionBucketInitBase(bucket, root);
155 void partitionAllocGenericInit(PartitionRootGeneric* root)
157 parititonAllocBaseInit(root);
161 // Precalculate some shift and mask constants used in the hot path.
162 // Example: malloc(41) == 101001 binary.
163 // Order is 6 (1 << 6-1)==32 is highest bit set.
164 // orderIndex is the next three MSB == 010 == 2.
165 // subOrderIndexMask is a mask for the remaining bits == 11 (masking to 01 for the subOrderIndex).
167 for (order = 0; order <= kBitsPerSizet; ++order) {
168 size_t orderIndexShift;
169 if (order < kGenericNumBucketsPerOrderBits + 1)
172 orderIndexShift = order - (kGenericNumBucketsPerOrderBits + 1);
173 root->orderIndexShifts[order] = orderIndexShift;
174 size_t subOrderIndexMask;
175 if (order == kBitsPerSizet) {
176 // This avoids invoking undefined behavior for an excessive shift.
177 subOrderIndexMask = static_cast<size_t>(-1) >> (kGenericNumBucketsPerOrderBits + 1);
179 subOrderIndexMask = ((1 << order) - 1) >> (kGenericNumBucketsPerOrderBits + 1);
181 root->orderSubIndexMasks[order] = subOrderIndexMask;
184 // Set up the actual usable buckets first.
185 // Note that typical values (i.e. min allocation size of 8) will result in
186 // invalid buckets (size==9 etc. or more generally, size is not a multiple
187 // of the smallest allocation granularity).
188 // We avoid them in the bucket lookup map, but we tolerate them to keep the
189 // code simpler and the structures more generic.
191 size_t currentSize = kGenericSmallestBucket;
192 size_t currentIncrement = kGenericSmallestBucket >> kGenericNumBucketsPerOrderBits;
193 PartitionBucket* bucket = &root->buckets[0];
194 for (i = 0; i < kGenericNumBucketedOrders; ++i) {
195 for (j = 0; j < kGenericNumBucketsPerOrder; ++j) {
196 bucket->slotSize = currentSize;
197 partitionBucketInitBase(bucket, root);
198 // Disable invalid buckets so that touching them faults.
199 if (currentSize % kGenericSmallestBucket)
200 bucket->activePagesHead = 0;
201 currentSize += currentIncrement;
204 currentIncrement <<= 1;
206 ASSERT(currentSize == 1 << kGenericMaxBucketedOrder);
207 ASSERT(bucket == &root->buckets[0] + (kGenericNumBucketedOrders * kGenericNumBucketsPerOrder));
209 // Then set up the fast size -> bucket lookup table.
210 bucket = &root->buckets[0];
211 PartitionBucket** bucketPtr = &root->bucketLookups[0];
212 for (order = 0; order <= kBitsPerSizet; ++order) {
213 for (j = 0; j < kGenericNumBucketsPerOrder; ++j) {
214 if (order < kGenericMinBucketedOrder) {
215 // Use the bucket of finest granularity for malloc(0) etc.
216 *bucketPtr++ = &root->buckets[0];
217 } else if (order > kGenericMaxBucketedOrder) {
218 *bucketPtr++ = &PartitionRootGeneric::gPagedBucket;
220 PartitionBucket* validBucket = bucket;
221 // Skip over invalid buckets.
222 while (validBucket->slotSize % kGenericSmallestBucket)
224 *bucketPtr++ = validBucket;
229 ASSERT(bucket == &root->buckets[0] + (kGenericNumBucketedOrders * kGenericNumBucketsPerOrder));
230 ASSERT(bucketPtr == &root->bucketLookups[0] + ((kBitsPerSizet + 1) * kGenericNumBucketsPerOrder));
231 // And there's one last bucket lookup that will be hit for e.g. malloc(-1),
232 // which tries to overflow to a non-existant order.
233 *bucketPtr = &PartitionRootGeneric::gPagedBucket;
236 static bool partitionAllocShutdownBucket(PartitionBucket* bucket)
238 // Failure here indicates a memory leak.
239 bool noLeaks = !bucket->numFullPages;
240 PartitionPage* page = bucket->activePagesHead;
242 if (page->numAllocatedSlots)
244 page = page->nextPage;
250 static void partitionAllocBaseShutdown(PartitionRootBase* root)
252 ASSERT(root->initialized);
253 root->initialized = false;
255 // Now that we've examined all partition pages in all buckets, it's safe
256 // to free all our super pages. Since the super page extent entries are
257 // stored in the super pages, we need to be careful not to access them
258 // after we've released the corresponding super page.
259 PartitionSuperPageExtentEntry* entry = root->firstExtent;
261 PartitionSuperPageExtentEntry* nextEntry = entry->next;
262 char* superPage = entry->superPageBase;
263 char* superPagesEnd = entry->superPagesEnd;
264 while (superPage < superPagesEnd) {
265 freePages(superPage, kSuperPageSize);
266 superPage += kSuperPageSize;
272 bool partitionAllocShutdown(PartitionRoot* root)
276 for (i = 0; i < root->numBuckets; ++i) {
277 PartitionBucket* bucket = &root->buckets()[i];
278 if (!partitionAllocShutdownBucket(bucket))
282 partitionAllocBaseShutdown(root);
286 bool partitionAllocGenericShutdown(PartitionRootGeneric* root)
290 for (i = 0; i < kGenericNumBucketedOrders * kGenericNumBucketsPerOrder; ++i) {
291 PartitionBucket* bucket = &root->buckets[i];
292 if (!partitionAllocShutdownBucket(bucket))
295 partitionAllocBaseShutdown(root);
299 static NEVER_INLINE void partitionOutOfMemory()
304 static ALWAYS_INLINE void partitionDecommitSystemPages(PartitionRootBase* root, void* addr, size_t len)
306 decommitSystemPages(addr, len);
307 ASSERT(root->totalSizeOfCommittedPages > len);
308 root->totalSizeOfCommittedPages -= len;
311 static ALWAYS_INLINE void partitionRecommitSystemPages(PartitionRootBase* root, void* addr, size_t len)
313 recommitSystemPages(addr, len);
314 root->totalSizeOfCommittedPages += len;
317 static ALWAYS_INLINE void* partitionAllocPartitionPages(PartitionRootBase* root, int flags, uint16_t numPartitionPages)
319 ASSERT(!(reinterpret_cast<uintptr_t>(root->nextPartitionPage) % kPartitionPageSize));
320 ASSERT(!(reinterpret_cast<uintptr_t>(root->nextPartitionPageEnd) % kPartitionPageSize));
321 RELEASE_ASSERT(numPartitionPages <= kNumPartitionPagesPerSuperPage);
322 size_t totalSize = kPartitionPageSize * numPartitionPages;
323 root->totalSizeOfCommittedPages += totalSize;
324 size_t numPartitionPagesLeft = (root->nextPartitionPageEnd - root->nextPartitionPage) >> kPartitionPageShift;
325 if (LIKELY(numPartitionPagesLeft >= numPartitionPages)) {
326 // In this case, we can still hand out pages from the current super page
328 char* ret = root->nextPartitionPage;
329 root->nextPartitionPage += totalSize;
333 // Need a new super page.
334 root->totalSizeOfSuperPages += kSuperPageSize;
335 char* requestedAddress = root->nextSuperPage;
336 char* superPage = reinterpret_cast<char*>(allocPages(requestedAddress, kSuperPageSize, kSuperPageSize));
337 if (UNLIKELY(!superPage))
339 root->nextSuperPage = superPage + kSuperPageSize;
340 char* ret = superPage + kPartitionPageSize;
341 root->nextPartitionPage = ret + totalSize;
342 root->nextPartitionPageEnd = root->nextSuperPage - kPartitionPageSize;
343 // Make the first partition page in the super page a guard page, but leave a
344 // hole in the middle.
345 // This is where we put page metadata and also a tiny amount of extent
347 setSystemPagesInaccessible(superPage, kSystemPageSize);
348 setSystemPagesInaccessible(superPage + (kSystemPageSize * 2), kPartitionPageSize - (kSystemPageSize * 2));
349 // Also make the last partition page a guard page.
350 setSystemPagesInaccessible(superPage + (kSuperPageSize - kPartitionPageSize), kPartitionPageSize);
352 // If we were after a specific address, but didn't get it, assume that
353 // the system chose a lousy address and re-randomize the next
355 if (requestedAddress && requestedAddress != superPage)
356 root->nextSuperPage = 0;
358 // We allocated a new super page so update super page metadata.
359 // First check if this is a new extent or not.
360 PartitionSuperPageExtentEntry* latestExtent = reinterpret_cast<PartitionSuperPageExtentEntry*>(partitionSuperPageToMetadataArea(superPage));
361 PartitionSuperPageExtentEntry* currentExtent = root->currentExtent;
362 bool isNewExtent = (superPage != requestedAddress);
363 if (UNLIKELY(isNewExtent)) {
364 latestExtent->next = 0;
365 if (UNLIKELY(!currentExtent)) {
366 root->firstExtent = latestExtent;
368 ASSERT(currentExtent->superPageBase);
369 currentExtent->next = latestExtent;
371 root->currentExtent = latestExtent;
372 currentExtent = latestExtent;
373 currentExtent->superPageBase = superPage;
374 currentExtent->superPagesEnd = superPage + kSuperPageSize;
376 // We allocated next to an existing extent so just nudge the size up a little.
377 currentExtent->superPagesEnd += kSuperPageSize;
378 ASSERT(ret >= currentExtent->superPageBase && ret < currentExtent->superPagesEnd);
380 // By storing the root in every extent metadata object, we have a fast way
381 // to go from a pointer within the partition to the root object.
382 latestExtent->root = root;
387 static ALWAYS_INLINE void partitionUnusePage(PartitionRootBase* root, PartitionPage* page)
389 ASSERT(page->bucket->numSystemPagesPerSlotSpan);
390 void* addr = partitionPageToPointer(page);
391 partitionDecommitSystemPages(root, addr, page->bucket->numSystemPagesPerSlotSpan * kSystemPageSize);
394 static ALWAYS_INLINE uint16_t partitionBucketSlots(const PartitionBucket* bucket)
396 return static_cast<uint16_t>((bucket->numSystemPagesPerSlotSpan * kSystemPageSize) / bucket->slotSize);
399 static ALWAYS_INLINE uint16_t partitionBucketPartitionPages(const PartitionBucket* bucket)
401 return (bucket->numSystemPagesPerSlotSpan + (kNumSystemPagesPerPartitionPage - 1)) / kNumSystemPagesPerPartitionPage;
404 static ALWAYS_INLINE void partitionPageReset(PartitionPage* page, PartitionBucket* bucket)
406 ASSERT(page != &PartitionRootGeneric::gSeedPage);
407 page->numAllocatedSlots = 0;
408 page->numUnprovisionedSlots = partitionBucketSlots(bucket);
409 ASSERT(page->numUnprovisionedSlots);
410 page->bucket = bucket;
412 // NULLing the freelist is not strictly necessary but it makes an ASSERT in partitionPageFillFreelist simpler.
413 page->freelistHead = 0;
414 page->pageOffset = 0;
415 page->freeCacheIndex = -1;
416 uint16_t numPartitionPages = partitionBucketPartitionPages(bucket);
417 char* pageCharPtr = reinterpret_cast<char*>(page);
418 for (uint16_t i = 1; i < numPartitionPages; ++i) {
419 pageCharPtr += kPageMetadataSize;
420 PartitionPage* secondaryPage = reinterpret_cast<PartitionPage*>(pageCharPtr);
421 secondaryPage->pageOffset = i;
425 static ALWAYS_INLINE char* partitionPageAllocAndFillFreelist(PartitionPage* page)
427 ASSERT(page != &PartitionRootGeneric::gSeedPage);
428 uint16_t numSlots = page->numUnprovisionedSlots;
430 PartitionBucket* bucket = page->bucket;
431 // We should only get here when _every_ slot is either used or unprovisioned.
432 // (The third state is "on the freelist". If we have a non-empty freelist, we should not get here.)
433 ASSERT(numSlots + page->numAllocatedSlots == partitionBucketSlots(bucket));
434 // Similarly, make explicitly sure that the freelist is empty.
435 ASSERT(!page->freelistHead);
436 ASSERT(page->numAllocatedSlots >= 0);
438 size_t size = bucket->slotSize;
439 char* base = reinterpret_cast<char*>(partitionPageToPointer(page));
440 char* returnObject = base + (size * page->numAllocatedSlots);
441 char* firstFreelistPointer = returnObject + size;
442 char* firstFreelistPointerExtent = firstFreelistPointer + sizeof(PartitionFreelistEntry*);
443 // Our goal is to fault as few system pages as possible. We calculate the
444 // page containing the "end" of the returned slot, and then allow freelist
445 // pointers to be written up to the end of that page.
446 char* subPageLimit = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(firstFreelistPointer) + kSystemPageOffsetMask) & kSystemPageBaseMask);
447 char* slotsLimit = returnObject + (size * page->numUnprovisionedSlots);
448 char* freelistLimit = subPageLimit;
449 if (UNLIKELY(slotsLimit < freelistLimit))
450 freelistLimit = slotsLimit;
452 uint16_t numNewFreelistEntries = 0;
453 if (LIKELY(firstFreelistPointerExtent <= freelistLimit)) {
454 // Only consider used space in the slot span. If we consider wasted
455 // space, we may get an off-by-one when a freelist pointer fits in the
456 // wasted space, but a slot does not.
457 // We know we can fit at least one freelist pointer.
458 numNewFreelistEntries = 1;
459 // Any further entries require space for the whole slot span.
460 numNewFreelistEntries += static_cast<uint16_t>((freelistLimit - firstFreelistPointerExtent) / size);
463 // We always return an object slot -- that's the +1 below.
464 // We do not neccessarily create any new freelist entries, because we cross sub page boundaries frequently for large bucket sizes.
465 ASSERT(numNewFreelistEntries + 1 <= numSlots);
466 numSlots -= (numNewFreelistEntries + 1);
467 page->numUnprovisionedSlots = numSlots;
468 page->numAllocatedSlots++;
470 if (LIKELY(numNewFreelistEntries)) {
471 char* freelistPointer = firstFreelistPointer;
472 PartitionFreelistEntry* entry = reinterpret_cast<PartitionFreelistEntry*>(freelistPointer);
473 page->freelistHead = entry;
474 while (--numNewFreelistEntries) {
475 freelistPointer += size;
476 PartitionFreelistEntry* nextEntry = reinterpret_cast<PartitionFreelistEntry*>(freelistPointer);
477 entry->next = partitionFreelistMask(nextEntry);
480 entry->next = partitionFreelistMask(0);
482 page->freelistHead = 0;
487 // This helper function scans the active page list for a suitable new active
488 // page, starting at the passed in page.
489 // When it finds a suitable new active page (one that has free slots), it is
490 // set as the new active page and true is returned. If there is no suitable new
491 // active page, false is returned and the current active page is set to null.
492 // As potential pages are scanned, they are tidied up according to their state.
493 // Freed pages are swept on to the free page list and full pages are unlinked
495 static ALWAYS_INLINE bool partitionSetNewActivePage(PartitionPage* page)
497 if (page == &PartitionRootBase::gSeedPage) {
498 ASSERT(!page->nextPage);
502 PartitionPage* nextPage = 0;
503 PartitionBucket* bucket = page->bucket;
505 for (; page; page = nextPage) {
506 nextPage = page->nextPage;
507 ASSERT(page->bucket == bucket);
508 ASSERT(page != bucket->freePagesHead);
509 ASSERT(!bucket->freePagesHead || page != bucket->freePagesHead->nextPage);
511 // Page is usable if it has something on the freelist, or unprovisioned
512 // slots that can be turned into a freelist.
513 if (LIKELY(page->freelistHead != 0) || LIKELY(page->numUnprovisionedSlots)) {
514 bucket->activePagesHead = page;
518 ASSERT(page->numAllocatedSlots >= 0);
519 if (LIKELY(page->numAllocatedSlots == 0)) {
520 ASSERT(page->freeCacheIndex == -1);
521 // We hit a free page, so shepherd it on to the free page list.
522 page->nextPage = bucket->freePagesHead;
523 bucket->freePagesHead = page;
525 // If we get here, we found a full page. Skip over it too, and also
526 // tag it as full (via a negative value). We need it tagged so that
527 // free'ing can tell, and move it back into the active page list.
528 ASSERT(page->numAllocatedSlots == partitionBucketSlots(bucket));
529 page->numAllocatedSlots = -page->numAllocatedSlots;
530 ++bucket->numFullPages;
531 // numFullPages is a uint16_t for efficient packing so guard against
532 // overflow to be safe.
533 RELEASE_ASSERT(bucket->numFullPages);
534 // Not necessary but might help stop accidents.
539 bucket->activePagesHead = 0;
543 struct PartitionDirectMapExtent {
544 size_t mapSize; // Mapped size, not including guard pages and meta-data.
547 static ALWAYS_INLINE PartitionDirectMapExtent* partitionPageToDirectMapExtent(PartitionPage* page)
549 ASSERT(partitionBucketIsDirectMapped(page->bucket));
550 return reinterpret_cast<PartitionDirectMapExtent*>(reinterpret_cast<char*>(page) + 2 * kPageMetadataSize);
553 static ALWAYS_INLINE void* partitionDirectMap(PartitionRootBase* root, int flags, size_t size)
555 size = partitionDirectMapSize(size);
557 // Because we need to fake looking like a super page, We need to allocate
558 // a bunch of system pages more than "size":
559 // - The first few system pages are the partition page in which the super
560 // page metadata is stored. We fault just one system page out of a partition
562 // - We add a trailing guard page.
563 size_t mapSize = size + kPartitionPageSize + kSystemPageSize;
564 // Round up to the allocation granularity.
565 mapSize += kPageAllocationGranularityOffsetMask;
566 mapSize &= kPageAllocationGranularityBaseMask;
568 root->totalSizeOfCommittedPages += size + kSystemPageSize;
570 // TODO: we may want to let the operating system place these allocations
571 // where it pleases. On 32-bit, this might limit address space
572 // fragmentation and on 64-bit, this might have useful savings for TLB
573 // and page table overhead.
574 // TODO: if upsizing realloc()s are common on large sizes, we could
575 // consider over-allocating address space on 64-bit, "just in case".
576 // TODO: consider pre-populating page tables (e.g. MAP_POPULATE on Linux,
577 // MADV_WILLNEED on POSIX).
578 // TODO: these pages will be zero-filled. Consider internalizing an
579 // allocZeroed() API so we can avoid a memset() entirely in this case.
580 char* ptr = reinterpret_cast<char*>(allocPages(0, mapSize, kSuperPageSize));
583 char* ret = ptr + kPartitionPageSize;
584 // TODO: due to all the guard paging, this arrangement creates 4 mappings.
585 // We could get it down to three by using read-only for the metadata page,
586 // or perhaps two by leaving out the trailing guard page on 64-bit.
587 setSystemPagesInaccessible(ptr, kSystemPageSize);
588 setSystemPagesInaccessible(ptr + (kSystemPageSize * 2), kPartitionPageSize - (kSystemPageSize * 2));
589 setSystemPagesInaccessible(ret + size, kSystemPageSize);
591 PartitionSuperPageExtentEntry* extent = reinterpret_cast<PartitionSuperPageExtentEntry*>(partitionSuperPageToMetadataArea(ptr));
593 PartitionPage* page = partitionPointerToPageNoAlignmentCheck(ret);
594 PartitionBucket* bucket = reinterpret_cast<PartitionBucket*>(reinterpret_cast<char*>(page) + kPageMetadataSize);
595 page->freelistHead = 0;
597 page->bucket = bucket;
598 page->numAllocatedSlots = 1;
599 page->numUnprovisionedSlots = 0;
600 page->pageOffset = 0;
601 page->freeCacheIndex = 0;
603 bucket->activePagesHead = 0;
604 bucket->freePagesHead = 0;
605 bucket->slotSize = size;
606 bucket->numSystemPagesPerSlotSpan = 0;
607 bucket->numFullPages = 0;
609 PartitionDirectMapExtent* mapExtent = partitionPageToDirectMapExtent(page);
610 mapExtent->mapSize = mapSize - kPartitionPageSize - kSystemPageSize;
615 static ALWAYS_INLINE void partitionDirectUnmap(PartitionPage* page)
617 size_t unmapSize = partitionPageToDirectMapExtent(page)->mapSize;
619 // Add on the size of the trailing guard page and preceeding partition
621 unmapSize += kPartitionPageSize + kSystemPageSize;
623 PartitionRootBase* root = partitionPageToRoot(page);
624 root->totalSizeOfCommittedPages -= page->bucket->slotSize + kSystemPageSize;
626 ASSERT(!(unmapSize & kPageAllocationGranularityOffsetMask));
628 char* ptr = reinterpret_cast<char*>(partitionPageToPointer(page));
629 // Account for the mapping starting a partition page before the actual
630 // allocation address.
631 ptr -= kPartitionPageSize;
633 freePages(ptr, unmapSize);
636 void* partitionAllocSlowPath(PartitionRootBase* root, int flags, size_t size, PartitionBucket* bucket)
638 // The slow path is called when the freelist is empty.
639 ASSERT(!bucket->activePagesHead->freelistHead);
641 PartitionPage* newPage = nullptr;
643 // For the partitionAllocGeneric API, we have a bunch of buckets marked
644 // as special cases. We bounce them through to the slow path so that we
645 // can still have a blazing fast hot path due to lack of corner-case
647 bool returnNull = flags & PartitionAllocReturnNull;
648 if (UNLIKELY(partitionBucketIsDirectMapped(bucket))) {
649 ASSERT(size > kGenericMaxBucketed);
650 ASSERT(bucket == &PartitionRootBase::gPagedBucket);
651 if (size > kGenericMaxDirectMapped) {
654 RELEASE_ASSERT(false);
656 void* ptr = partitionDirectMap(root, flags, size);
659 goto partitionAllocSlowPathFailed;
662 // First, look for a usable page in the existing active pages list.
663 // Change active page, accepting the current page as a candidate.
664 if (LIKELY(partitionSetNewActivePage(bucket->activePagesHead))) {
665 newPage = bucket->activePagesHead;
666 if (LIKELY(newPage->freelistHead != 0)) {
667 PartitionFreelistEntry* ret = newPage->freelistHead;
668 newPage->freelistHead = partitionFreelistMask(ret->next);
669 newPage->numAllocatedSlots++;
672 ASSERT(newPage->numUnprovisionedSlots);
673 return partitionPageAllocAndFillFreelist(newPage);
676 // Second, look in our list of freed but reserved pages.
677 newPage = bucket->freePagesHead;
678 if (LIKELY(newPage != 0)) {
679 ASSERT(newPage != &PartitionRootGeneric::gSeedPage);
680 ASSERT(!newPage->freelistHead);
681 ASSERT(!newPage->numAllocatedSlots);
682 ASSERT(!newPage->numUnprovisionedSlots);
683 ASSERT(newPage->freeCacheIndex == -1);
684 bucket->freePagesHead = newPage->nextPage;
685 void* addr = partitionPageToPointer(newPage);
686 partitionRecommitSystemPages(root, addr, newPage->bucket->numSystemPagesPerSlotSpan * kSystemPageSize);
688 // Third. If we get here, we need a brand new page.
689 uint16_t numPartitionPages = partitionBucketPartitionPages(bucket);
690 void* rawNewPage = partitionAllocPartitionPages(root, flags, numPartitionPages);
691 if (UNLIKELY(!rawNewPage))
692 goto partitionAllocSlowPathFailed;
693 // Skip the alignment check because it depends on page->bucket, which is not yet set.
694 newPage = partitionPointerToPageNoAlignmentCheck(rawNewPage);
697 partitionPageReset(newPage, bucket);
698 bucket->activePagesHead = newPage;
699 return partitionPageAllocAndFillFreelist(newPage);
701 partitionAllocSlowPathFailed:
703 // If we get here, we will set the active page to null, which is an
704 // invalid state. To support continued use of this bucket, we need to
705 // restore a valid state, by setting the active page to the seed page.
706 bucket->activePagesHead = &PartitionRootGeneric::gSeedPage;
709 partitionOutOfMemory();
713 static ALWAYS_INLINE void partitionFreePage(PartitionRootBase* root, PartitionPage* page)
715 ASSERT(page->freelistHead);
716 ASSERT(!page->numAllocatedSlots);
717 partitionUnusePage(root, page);
718 // We actually leave the freed page in the active list. We'll sweep it on
719 // to the free page list when we next walk the active page list. Pulling
720 // this trick enables us to use a singly-linked page list for all cases,
721 // which is critical in keeping the page metadata structure down to 32
723 page->freelistHead = 0;
724 page->numUnprovisionedSlots = 0;
727 static ALWAYS_INLINE void partitionRegisterEmptyPage(PartitionPage* page)
729 PartitionRootBase* root = partitionPageToRoot(page);
731 // If the page is already registered as empty, give it another life.
732 if (page->freeCacheIndex != -1) {
733 ASSERT(page->freeCacheIndex >= 0);
734 ASSERT(static_cast<unsigned>(page->freeCacheIndex) < kMaxFreeableSpans);
735 ASSERT(root->globalEmptyPageRing[page->freeCacheIndex] == page);
736 root->globalEmptyPageRing[page->freeCacheIndex] = 0;
739 int16_t currentIndex = root->globalEmptyPageRingIndex;
740 PartitionPage* pageToFree = root->globalEmptyPageRing[currentIndex];
741 // The page might well have been re-activated, filled up, etc. before we get
742 // around to looking at it here.
744 ASSERT(pageToFree != &PartitionRootBase::gSeedPage);
745 ASSERT(pageToFree->freeCacheIndex >= 0);
746 ASSERT(static_cast<unsigned>(pageToFree->freeCacheIndex) < kMaxFreeableSpans);
747 ASSERT(pageToFree == root->globalEmptyPageRing[pageToFree->freeCacheIndex]);
748 if (!pageToFree->numAllocatedSlots && pageToFree->freelistHead) {
749 // The page is still empty, and not freed, so _really_ free it.
750 partitionFreePage(root, pageToFree);
752 pageToFree->freeCacheIndex = -1;
755 // We put the empty slot span on our global list of "pages that were once
756 // empty". thus providing it a bit of breathing room to get re-used before
757 // we really free it. This improves performance, particularly on Mac OS X
758 // which has subpar memory management performance.
759 root->globalEmptyPageRing[currentIndex] = page;
760 page->freeCacheIndex = currentIndex;
762 if (currentIndex == kMaxFreeableSpans)
764 root->globalEmptyPageRingIndex = currentIndex;
767 void partitionFreeSlowPath(PartitionPage* page)
769 PartitionBucket* bucket = page->bucket;
770 ASSERT(page != &PartitionRootGeneric::gSeedPage);
771 if (LIKELY(page->numAllocatedSlots == 0)) {
772 // Page became fully unused.
773 if (UNLIKELY(partitionBucketIsDirectMapped(bucket))) {
774 partitionDirectUnmap(page);
777 // If it's the current active page, attempt to change it. We'd prefer to leave
778 // the page empty as a gentle force towards defragmentation.
779 if (LIKELY(page == bucket->activePagesHead) && page->nextPage) {
780 if (partitionSetNewActivePage(page->nextPage)) {
781 ASSERT(bucket->activePagesHead != page);
782 // Link the empty page back in after the new current page, to
783 // avoid losing a reference to it.
784 // TODO: consider walking the list to link the empty page after
785 // all non-empty pages?
786 PartitionPage* currentPage = bucket->activePagesHead;
787 page->nextPage = currentPage->nextPage;
788 currentPage->nextPage = page;
790 bucket->activePagesHead = page;
794 partitionRegisterEmptyPage(page);
796 // Ensure that the page is full. That's the only valid case if we
798 ASSERT(page->numAllocatedSlots < 0);
799 // A transition of numAllocatedSlots from 0 to -1 is not legal, and
800 // likely indicates a double-free.
801 RELEASE_ASSERT(page->numAllocatedSlots != -1);
802 page->numAllocatedSlots = -page->numAllocatedSlots - 2;
803 ASSERT(page->numAllocatedSlots == partitionBucketSlots(bucket) - 1);
804 // Fully used page became partially used. It must be put back on the
805 // non-full page list. Also make it the current page to increase the
806 // chances of it being filled up again. The old current page will be
808 if (UNLIKELY(bucket->activePagesHead == &PartitionRootGeneric::gSeedPage))
811 page->nextPage = bucket->activePagesHead;
812 bucket->activePagesHead = page;
813 --bucket->numFullPages;
814 // Special case: for a partition page with just a single slot, it may
815 // now be empty and we want to run it through the empty logic.
816 if (UNLIKELY(page->numAllocatedSlots == 0))
817 partitionFreeSlowPath(page);
821 bool partitionReallocDirectMappedInPlace(PartitionRootGeneric* root, PartitionPage* page, size_t newSize)
823 ASSERT(partitionBucketIsDirectMapped(page->bucket));
825 newSize = partitionCookieSizeAdjustAdd(newSize);
827 // Note that the new size might be a bucketed size; this function is called
828 // whenever we're reallocating a direct mapped allocation.
829 newSize = partitionDirectMapSize(newSize);
830 if (newSize < kGenericMinDirectMappedDownsize)
833 // bucket->slotSize is the current size of the allocation.
834 size_t currentSize = page->bucket->slotSize;
835 if (newSize == currentSize)
838 char* charPtr = static_cast<char*>(partitionPageToPointer(page));
840 if (newSize < currentSize) {
841 size_t mapSize = partitionPageToDirectMapExtent(page)->mapSize;
843 // Don't reallocate in-place if new size is less than 80 % of the full
844 // map size, to avoid holding on to too much unused address space.
845 if ((newSize / kSystemPageSize) * 5 < (mapSize / kSystemPageSize) * 4)
848 // Shrink by decommitting unneeded pages and making them inaccessible.
849 size_t decommitSize = currentSize - newSize;
850 partitionDecommitSystemPages(root, charPtr + newSize, decommitSize);
851 setSystemPagesInaccessible(charPtr + newSize, decommitSize);
852 } else if (newSize <= partitionPageToDirectMapExtent(page)->mapSize) {
853 // Grow within the actually allocated memory. Just need to make the
854 // pages accessible again.
855 size_t recommitSize = newSize - currentSize;
856 setSystemPagesAccessible(charPtr + currentSize, recommitSize);
857 partitionRecommitSystemPages(root, charPtr + currentSize, recommitSize);
860 memset(charPtr + currentSize, kUninitializedByte, recommitSize);
863 // We can't perform the realloc in-place.
864 // TODO: support this too when possible.
869 // Write a new trailing cookie.
870 partitionCookieWriteValue(charPtr + newSize - kCookieSize);
873 page->bucket->slotSize = newSize;
877 void* partitionReallocGeneric(PartitionRootGeneric* root, void* ptr, size_t newSize)
879 #if defined(MEMORY_TOOL_REPLACES_ALLOCATOR)
880 return realloc(ptr, newSize);
883 return partitionAllocGeneric(root, newSize);
884 if (UNLIKELY(!newSize)) {
885 partitionFreeGeneric(root, ptr);
889 RELEASE_ASSERT(newSize <= kGenericMaxDirectMapped);
891 ASSERT(partitionPointerIsValid(partitionCookieFreePointerAdjust(ptr)));
893 PartitionPage* page = partitionPointerToPage(partitionCookieFreePointerAdjust(ptr));
895 if (UNLIKELY(partitionBucketIsDirectMapped(page->bucket))) {
896 // We may be able to perform the realloc in place by changing the
897 // accessibility of memory pages and, if reducing the size, decommitting
899 if (partitionReallocDirectMappedInPlace(root, page, newSize))
903 size_t actualNewSize = partitionAllocActualSize(root, newSize);
904 size_t actualOldSize = partitionAllocGetSize(ptr);
906 // TODO: note that tcmalloc will "ignore" a downsizing realloc() unless the
907 // new size is a significant percentage smaller. We could do the same if we
908 // determine it is a win.
909 if (actualNewSize == actualOldSize) {
910 // Trying to allocate a block of size newSize would give us a block of
911 // the same size as the one we've already got, so no point in doing
916 // This realloc cannot be resized in-place. Sadness.
917 void* ret = partitionAllocGeneric(root, newSize);
918 size_t copySize = actualOldSize;
919 if (newSize < copySize)
922 memcpy(ret, ptr, copySize);
923 partitionFreeGeneric(root, ptr);
930 void partitionDumpStats(const PartitionRoot& root)
933 size_t totalLive = 0;
934 size_t totalResident = 0;
935 size_t totalFreeable = 0;
936 for (i = 0; i < root.numBuckets; ++i) {
937 const PartitionBucket& bucket = root.buckets()[i];
938 if (bucket.activePagesHead == &PartitionRootGeneric::gSeedPage && !bucket.freePagesHead && !bucket.numFullPages) {
939 // Empty bucket with no freelist or full pages. Skip reporting it.
942 size_t numFreePages = 0;
943 PartitionPage* freePages = bucket.freePagesHead;
946 freePages = freePages->nextPage;
948 size_t bucketSlotSize = bucket.slotSize;
949 uint16_t bucketNumSlots = partitionBucketSlots(&bucket);
950 size_t bucketUsefulStorage = bucketSlotSize * bucketNumSlots;
951 size_t bucketPageSize = bucket.numSystemPagesPerSlotSpan * kSystemPageSize;
952 size_t bucketWaste = bucketPageSize - bucketUsefulStorage;
953 size_t numActiveBytes = bucket.numFullPages * bucketUsefulStorage;
954 size_t numResidentBytes = bucket.numFullPages * bucketPageSize;
955 size_t numFreeableBytes = 0;
956 size_t numActivePages = 0;
957 const PartitionPage* page = bucket.activePagesHead;
959 ASSERT(page != &PartitionRootGeneric::gSeedPage);
960 // A page may be on the active list but freed and not yet swept.
961 if (!page->freelistHead && !page->numUnprovisionedSlots && !page->numAllocatedSlots) {
965 numActiveBytes += (page->numAllocatedSlots * bucketSlotSize);
966 size_t pageBytesResident = (bucketNumSlots - page->numUnprovisionedSlots) * bucketSlotSize;
967 // Round up to system page size.
968 pageBytesResident = (pageBytesResident + kSystemPageOffsetMask) & kSystemPageBaseMask;
969 numResidentBytes += pageBytesResident;
970 if (!page->numAllocatedSlots)
971 numFreeableBytes += pageBytesResident;
973 page = page->nextPage;
975 totalLive += numActiveBytes;
976 totalResident += numResidentBytes;
977 totalFreeable += numFreeableBytes;
978 printf("bucket size %zu (pageSize %zu waste %zu): %zu alloc/%zu commit/%zu freeable bytes, %zu/%zu/%zu full/active/free pages\n", bucketSlotSize, bucketPageSize, bucketWaste, numActiveBytes, numResidentBytes, numFreeableBytes, static_cast<size_t>(bucket.numFullPages), numActivePages, numFreePages);
980 printf("total live: %zu bytes\n", totalLive);
981 printf("total resident: %zu bytes\n", totalResident);
982 printf("total freeable: %zu bytes\n", totalFreeable);