Source/WTF/wtf/FastMalloc.cpp

   1 // Copyright (c) 2005, 2007, Google Inc.
   2 // All rights reserved.
   3 // Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011 Apple Inc. All rights reserved.
   4 //
   5 // Redistribution and use in source and binary forms, with or without
   6 // modification, are permitted provided that the following conditions are
   7 // met:
   8 //
   9 //     * Redistributions of source code must retain the above copyright
  10 // notice, this list of conditions and the following disclaimer.
  11 //     * Redistributions in binary form must reproduce the above
  12 // copyright notice, this list of conditions and the following disclaimer
  13 // in the documentation and/or other materials provided with the
  14 // distribution.
  15 //     * Neither the name of Google Inc. nor the names of its
  16 // contributors may be used to endorse or promote products derived from
  17 // this software without specific prior written permission.
  18 //
  19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 // ---
  32 // Author: Sanjay Ghemawat <opensource@google.com>
  33 //
  34 // A malloc that uses a per-thread cache to satisfy small malloc requests.
  35 // (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
  36 //
  37 // See doc/tcmalloc.html for a high-level
  38 // description of how this malloc works.
  39 //
  40 // SYNCHRONIZATION
  41 //  1. The thread-specific lists are accessed without acquiring any locks.
  42 //     This is safe because each such list is only accessed by one thread.
  43 //  2. We have a lock per central free-list, and hold it while manipulating
  44 //     the central free list for a particular size.
  45 //  3. The central page allocator is protected by "pageheap_lock".
  46 //  4. The pagemap (which maps from page-number to descriptor),
  47 //     can be read without holding any locks, and written while holding
  48 //     the "pageheap_lock".
  49 //  5. To improve performance, a subset of the information one can get
  50 //     from the pagemap is cached in a data structure, pagemap_cache_,
  51 //     that atomically reads and writes its entries.  This cache can be
  52 //     read and written without locking.
  53 //
  54 //     This multi-threaded access to the pagemap is safe for fairly
  55 //     subtle reasons.  We basically assume that when an object X is
  56 //     allocated by thread A and deallocated by thread B, there must
  57 //     have been appropriate synchronization in the handoff of object
  58 //     X from thread A to thread B.  The same logic applies to pagemap_cache_.
  59 //
  60 // THE PAGEID-TO-SIZECLASS CACHE
  61 // Hot PageID-to-sizeclass mappings are held by pagemap_cache_.  If this cache
  62 // returns 0 for a particular PageID then that means "no information," not that
  63 // the sizeclass is 0.  The cache may have stale information for pages that do
  64 // not hold the beginning of any free()'able object.  Staleness is eliminated
  65 // in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
  66 // do_memalign() for all other relevant pages.
  67 //
  68 // TODO: Bias reclamation to larger addresses
  69 // TODO: implement mallinfo/mallopt
  70 // TODO: Better testing
  71 //
  72 // 9/28/2003 (new page-level allocator replaces ptmalloc2):
  73 // * malloc/free of small objects goes from ~300 ns to ~50 ns.
  74 // * allocation of a reasonably complicated struct
  75 //   goes from about 1100 ns to about 300 ns.
  76
  77 #include "config.h"
  78 #include "FastMalloc.h"
  79
  80 #include "Assertions.h"
  81 #include <limits>
  82 #if OS(WINDOWS)
  83 #include <windows.h>
  84 #else
  85 #include <pthread.h>
  86 #endif
  87 #include <wtf/StdLibExtras.h>
  88 #include <string.h>
  89
  90 #ifndef NO_TCMALLOC_SAMPLES
  91 #ifdef WTF_CHANGES
  92 #define NO_TCMALLOC_SAMPLES
  93 #endif
  94 #endif
  95
  96 #if !(defined(USE_SYSTEM_MALLOC) && USE_SYSTEM_MALLOC) && defined(NDEBUG)
  97 #define FORCE_SYSTEM_MALLOC 0
  98 #else
  99 #define FORCE_SYSTEM_MALLOC 1
 100 #endif
 101
 102 // Use a background thread to periodically scavenge memory to release back to the system
 103 #if PLATFORM(IOS)
 104 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 0
 105 #else
 106 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1
 107 #endif
 108
 109 #ifndef NDEBUG
 110 namespace WTF {
 111
 112 #if OS(WINDOWS)
 113
 114 // TLS_OUT_OF_INDEXES is not defined on WinCE.
 115 #ifndef TLS_OUT_OF_INDEXES
 116 #define TLS_OUT_OF_INDEXES 0xffffffff
 117 #endif
 118
 119 static DWORD isForibiddenTlsIndex = TLS_OUT_OF_INDEXES;
 120 static const LPVOID kTlsAllowValue = reinterpret_cast<LPVOID>(0); // Must be zero.
 121 static const LPVOID kTlsForbiddenValue = reinterpret_cast<LPVOID>(1);
 122
 123 #if !ASSERT_DISABLED
 124 static bool isForbidden()
 125 {
 126     // By default, fastMalloc is allowed so we don't allocate the
 127     // tls index unless we're asked to make it forbidden. If TlsSetValue
 128     // has not been called on a thread, the value returned by TlsGetValue is 0.
 129     return (isForibiddenTlsIndex != TLS_OUT_OF_INDEXES) && (TlsGetValue(isForibiddenTlsIndex) == kTlsForbiddenValue);
 130 }
 131 #endif
 132
 133 void fastMallocForbid()
 134 {
 135     if (isForibiddenTlsIndex == TLS_OUT_OF_INDEXES)
 136         isForibiddenTlsIndex = TlsAlloc(); // a little racey, but close enough for debug only
 137     TlsSetValue(isForibiddenTlsIndex, kTlsForbiddenValue);
 138 }
 139
 140 void fastMallocAllow()
 141 {
 142     if (isForibiddenTlsIndex == TLS_OUT_OF_INDEXES)
 143         return;
 144     TlsSetValue(isForibiddenTlsIndex, kTlsAllowValue);
 145 }
 146
 147 #else // !OS(WINDOWS)
 148
 149 static pthread_key_t isForbiddenKey;
 150 static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT;
 151 static void initializeIsForbiddenKey()
 152 {
 153   pthread_key_create(&isForbiddenKey, 0);
 154 }
 155
 156 #if !ASSERT_DISABLED
 157 static bool isForbidden()
 158 {
 159     pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
 160     return !!pthread_getspecific(isForbiddenKey);
 161 }
 162 #endif
 163
 164 void fastMallocForbid()
 165 {
 166     pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
 167     pthread_setspecific(isForbiddenKey, &isForbiddenKey);
 168 }
 169
 170 void fastMallocAllow()
 171 {
 172     pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
 173     pthread_setspecific(isForbiddenKey, 0);
 174 }
 175 #endif // OS(WINDOWS)
 176
 177 } // namespace WTF
 178 #endif // NDEBUG
 179
 180 namespace WTF {
 181
 182
 183 namespace Internal {
 184 #if !ENABLE(WTF_MALLOC_VALIDATION)
 185 WTF_EXPORT_PRIVATE void fastMallocMatchFailed(void*);
 186 #else
 187 COMPILE_ASSERT(((sizeof(ValidationHeader) % sizeof(AllocAlignmentInteger)) == 0), ValidationHeader_must_produce_correct_alignment);
 188 #endif
 189
 190 NO_RETURN_DUE_TO_CRASH void fastMallocMatchFailed(void*)
 191 {
 192     CRASH();
 193 }
 194
 195 } // namespace Internal
 196
 197
 198 void* fastZeroedMalloc(size_t n)
 199 {
 200     void* result = fastMalloc(n);
 201     memset(result, 0, n);
 202     return result;
 203 }
 204
 205 char* fastStrDup(const char* src)
 206 {
 207     size_t len = strlen(src) + 1;
 208     char* dup = static_cast<char*>(fastMalloc(len));
 209     memcpy(dup, src, len);
 210     return dup;
 211 }
 212
 213 TryMallocReturnValue tryFastZeroedMalloc(size_t n)
 214 {
 215     void* result;
 216     if (!tryFastMalloc(n).getValue(result))
 217         return 0;
 218     memset(result, 0, n);
 219     return result;
 220 }
 221
 222 } // namespace WTF
 223
 224 #if FORCE_SYSTEM_MALLOC
 225
 226 #if OS(DARWIN)
 227 #include <malloc/malloc.h>
 228 #elif OS(WINDOWS)
 229 #include <malloc.h>
 230 #endif
 231
 232 namespace WTF {
 233
 234 TryMallocReturnValue tryFastMalloc(size_t n)
 235 {
 236     ASSERT(!isForbidden());
 237
 238 #if ENABLE(WTF_MALLOC_VALIDATION)
 239     if (std::numeric_limits<size_t>::max() - Internal::ValidationBufferSize <= n)  // If overflow would occur...
 240         return 0;
 241
 242     void* result = malloc(n + Internal::ValidationBufferSize);
 243     if (!result)
 244         return 0;
 245     Internal::ValidationHeader* header = static_cast<Internal::ValidationHeader*>(result);
 246     header->m_size = n;
 247     header->m_type = Internal::AllocTypeMalloc;
 248     header->m_prefix = static_cast<unsigned>(Internal::ValidationPrefix);
 249     result = header + 1;
 250     *Internal::fastMallocValidationSuffix(result) = Internal::ValidationSuffix;
 251     fastMallocValidate(result);
 252     return result;
 253 #else
 254     return malloc(n);
 255 #endif
 256 }
 257
 258 void* fastMalloc(size_t n)
 259 {
 260     ASSERT(!isForbidden());
 261
 262 #if ENABLE(WTF_MALLOC_VALIDATION)
 263     TryMallocReturnValue returnValue = tryFastMalloc(n);
 264     void* result;
 265     if (!returnValue.getValue(result))
 266         CRASH();
 267 #else
 268     void* result = malloc(n);
 269 #endif
 270
 271     if (!result)
 272         CRASH();
 273
 274     return result;
 275 }
 276
 277 TryMallocReturnValue tryFastCalloc(size_t n_elements, size_t element_size)
 278 {
 279     ASSERT(!isForbidden());
 280
 281 #if ENABLE(WTF_MALLOC_VALIDATION)
 282     size_t totalBytes = n_elements * element_size;
 283     if (n_elements > 1 && element_size && (totalBytes / element_size) != n_elements)
 284         return 0;
 285
 286     TryMallocReturnValue returnValue = tryFastMalloc(totalBytes);
 287     void* result;
 288     if (!returnValue.getValue(result))
 289         return 0;
 290     memset(result, 0, totalBytes);
 291     fastMallocValidate(result);
 292     return result;
 293 #else
 294     return calloc(n_elements, element_size);
 295 #endif
 296 }
 297
 298 void* fastCalloc(size_t n_elements, size_t element_size)
 299 {
 300     ASSERT(!isForbidden());
 301
 302 #if ENABLE(WTF_MALLOC_VALIDATION)
 303     TryMallocReturnValue returnValue = tryFastCalloc(n_elements, element_size);
 304     void* result;
 305     if (!returnValue.getValue(result))
 306         CRASH();
 307 #else
 308     void* result = calloc(n_elements, element_size);
 309 #endif
 310
 311     if (!result)
 312         CRASH();
 313
 314     return result;
 315 }
 316
 317 void fastFree(void* p)
 318 {
 319     ASSERT(!isForbidden());
 320
 321 #if ENABLE(WTF_MALLOC_VALIDATION)
 322     if (!p)
 323         return;
 324
 325     fastMallocMatchValidateFree(p, Internal::AllocTypeMalloc);
 326     Internal::ValidationHeader* header = Internal::fastMallocValidationHeader(p);
 327     memset(p, 0xCC, header->m_size);
 328     free(header);
 329 #else
 330     free(p);
 331 #endif
 332 }
 333
 334 TryMallocReturnValue tryFastRealloc(void* p, size_t n)
 335 {
 336     ASSERT(!isForbidden());
 337
 338 #if ENABLE(WTF_MALLOC_VALIDATION)
 339     if (p) {
 340         if (std::numeric_limits<size_t>::max() - Internal::ValidationBufferSize <= n)  // If overflow would occur...
 341             return 0;
 342         fastMallocValidate(p);
 343         Internal::ValidationHeader* result = static_cast<Internal::ValidationHeader*>(realloc(Internal::fastMallocValidationHeader(p), n + Internal::ValidationBufferSize));
 344         if (!result)
 345             return 0;
 346         result->m_size = n;
 347         result = result + 1;
 348         *fastMallocValidationSuffix(result) = Internal::ValidationSuffix;
 349         fastMallocValidate(result);
 350         return result;
 351     } else {
 352         return fastMalloc(n);
 353     }
 354 #else
 355     return realloc(p, n);
 356 #endif
 357 }
 358
 359 void* fastRealloc(void* p, size_t n)
 360 {
 361     ASSERT(!isForbidden());
 362
 363 #if ENABLE(WTF_MALLOC_VALIDATION)
 364     TryMallocReturnValue returnValue = tryFastRealloc(p, n);
 365     void* result;
 366     if (!returnValue.getValue(result))
 367         CRASH();
 368 #else
 369     void* result = realloc(p, n);
 370 #endif
 371
 372     if (!result)
 373         CRASH();
 374     return result;
 375 }
 376
 377 void releaseFastMallocFreeMemory() { }
 378
 379 FastMallocStatistics fastMallocStatistics()
 380 {
 381     FastMallocStatistics statistics = { 0, 0, 0 };
 382     return statistics;
 383 }
 384
 385 size_t fastMallocSize(const void* p)
 386 {
 387 #if ENABLE(WTF_MALLOC_VALIDATION)
 388     return Internal::fastMallocValidationHeader(const_cast<void*>(p))->m_size;
 389 #elif OS(DARWIN)
 390     return malloc_size(p);
 391 #elif OS(WINDOWS)
 392     return _msize(const_cast<void*>(p));
 393 #else
 394     return 1;
 395 #endif
 396 }
 397
 398 } // namespace WTF
 399
 400 #if OS(DARWIN)
 401 // This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled.
 402 // It will never be used in this case, so it's type and value are less interesting than its presence.
 403 extern "C" WTF_EXPORT_PRIVATE const int jscore_fastmalloc_introspection = 0;
 404 #endif
 405
 406 #else // FORCE_SYSTEM_MALLOC
 407
 408 #include "AlwaysInline.h"
 409 #include "TCPackedCache.h"
 410 #include "TCPageMap.h"
 411 #include "TCSpinLock.h"
 412 #include "TCSystemAlloc.h"
 413 #include <algorithm>
 414 #include <pthread.h>
 415 #include <stdarg.h>
 416 #include <stddef.h>
 417 #include <stdint.h>
 418 #include <stdio.h>
 419 #if HAVE(ERRNO_H)
 420 #include <errno.h>
 421 #endif
 422 #if OS(UNIX)
 423 #include <unistd.h>
 424 #endif
 425 #if OS(WINDOWS)
 426 #ifndef WIN32_LEAN_AND_MEAN
 427 #define WIN32_LEAN_AND_MEAN
 428 #endif
 429 #include <windows.h>
 430 #endif
 431
 432 #ifdef WTF_CHANGES
 433
 434 #if OS(DARWIN)
 435 #include "MallocZoneSupport.h"
 436 #include <wtf/HashSet.h>
 437 #include <wtf/Vector.h>
 438 #endif
 439
 440 #if HAVE(HEADER_DETECTION_H)
 441 #include "HeaderDetection.h"
 442 #endif
 443
 444 #if HAVE(DISPATCH_H)
 445 #include <dispatch/dispatch.h>
 446 #endif
 447
 448 #if HAVE(PTHREAD_MACHDEP_H)
 449 #include <System/pthread_machdep.h>
 450
 451 #if defined(__PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0)
 452 #define WTF_USE_PTHREAD_GETSPECIFIC_DIRECT 1
 453 #endif
 454 #endif
 455
 456 #ifndef PRIuS
 457 #define PRIuS "zu"
 458 #endif
 459
 460 // Calling pthread_getspecific through a global function pointer is faster than a normal
 461 // call to the function on Mac OS X, and it's used in performance-critical code. So we
 462 // use a function pointer. But that's not necessarily faster on other platforms, and we had
 463 // problems with this technique on Windows, so we'll do this only on Mac OS X.
 464 #if OS(DARWIN)
 465 #if !USE(PTHREAD_GETSPECIFIC_DIRECT)
 466 static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific;
 467 #define pthread_getspecific(key) pthread_getspecific_function_pointer(key)
 468 #else
 469 #define pthread_getspecific(key) _pthread_getspecific_direct(key)
 470 #define pthread_setspecific(key, val) _pthread_setspecific_direct(key, (val))
 471 #endif
 472 #endif
 473
 474 #define DEFINE_VARIABLE(type, name, value, meaning) \
 475   namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead {  \
 476   type FLAGS_##name(value);                                \
 477   char FLAGS_no##name;                                                        \
 478   }                                                                           \
 479   using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
 480
 481 #define DEFINE_int64(name, value, meaning) \
 482   DEFINE_VARIABLE(int64_t, name, value, meaning)
 483
 484 #define DEFINE_double(name, value, meaning) \
 485   DEFINE_VARIABLE(double, name, value, meaning)
 486
 487 namespace WTF {
 488
 489 #define malloc fastMalloc
 490 #define calloc fastCalloc
 491 #define free fastFree
 492 #define realloc fastRealloc
 493
 494 #define MESSAGE LOG_ERROR
 495 #define CHECK_CONDITION ASSERT
 496
 497 #if OS(DARWIN)
 498 struct Span;
 499 class TCMalloc_Central_FreeListPadded;
 500 class TCMalloc_PageHeap;
 501 class TCMalloc_ThreadCache;
 502 template <typename T> class PageHeapAllocator;
 503
 504 class FastMallocZone {
 505 public:
 506     static void init();
 507
 508     static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t);
 509     static size_t goodSize(malloc_zone_t*, size_t size) { return size; }
 510     static boolean_t check(malloc_zone_t*) { return true; }
 511     static void  print(malloc_zone_t*, boolean_t) { }
 512     static void log(malloc_zone_t*, void*) { }
 513     static void forceLock(malloc_zone_t*) { }
 514     static void forceUnlock(malloc_zone_t*) { }
 515     static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); }
 516
 517 private:
 518     FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*);
 519     static size_t size(malloc_zone_t*, const void*);
 520     static void* zoneMalloc(malloc_zone_t*, size_t);
 521     static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size);
 522     static void zoneFree(malloc_zone_t*, void*);
 523     static void* zoneRealloc(malloc_zone_t*, void*, size_t);
 524     static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; }
 525     static void zoneDestroy(malloc_zone_t*) { }
 526
 527     malloc_zone_t m_zone;
 528     TCMalloc_PageHeap* m_pageHeap;
 529     TCMalloc_ThreadCache** m_threadHeaps;
 530     TCMalloc_Central_FreeListPadded* m_centralCaches;
 531     PageHeapAllocator<Span>* m_spanAllocator;
 532     PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator;
 533 };
 534
 535 #endif
 536
 537 #endif
 538
 539 #ifndef WTF_CHANGES
 540 // This #ifdef should almost never be set.  Set NO_TCMALLOC_SAMPLES if
 541 // you're porting to a system where you really can't get a stacktrace.
 542 #ifdef NO_TCMALLOC_SAMPLES
 543 // We use #define so code compiles even if you #include stacktrace.h somehow.
 544 # define GetStackTrace(stack, depth, skip)  (0)
 545 #else
 546 # include <google/stacktrace.h>
 547 #endif
 548 #endif
 549
 550 // Even if we have support for thread-local storage in the compiler
 551 // and linker, the OS may not support it.  We need to check that at
 552 // runtime.  Right now, we have to keep a manual set of "bad" OSes.
 553 #if defined(HAVE_TLS)
 554   static bool kernel_supports_tls = false;      // be conservative
 555   static inline bool KernelSupportsTLS() {
 556     return kernel_supports_tls;
 557   }
 558 # if !HAVE_DECL_UNAME   // if too old for uname, probably too old for TLS
 559     static void CheckIfKernelSupportsTLS() {
 560       kernel_supports_tls = false;
 561     }
 562 # else
 563 #   include <sys/utsname.h>    // DECL_UNAME checked for <sys/utsname.h> too
 564     static void CheckIfKernelSupportsTLS() {
 565       struct utsname buf;
 566       if (uname(&buf) != 0) {   // should be impossible
 567         MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno);
 568         kernel_supports_tls = false;
 569       } else if (strcasecmp(buf.sysname, "linux") == 0) {
 570         // The linux case: the first kernel to support TLS was 2.6.0
 571         if (buf.release[0] < '2' && buf.release[1] == '.')    // 0.x or 1.x
 572           kernel_supports_tls = false;
 573         else if (buf.release[0] == '2' && buf.release[1] == '.' &&
 574                  buf.release[2] >= '0' && buf.release[2] < '6' &&
 575                  buf.release[3] == '.')                       // 2.0 - 2.5
 576           kernel_supports_tls = false;
 577         else
 578           kernel_supports_tls = true;
 579       } else {        // some other kernel, we'll be optimisitic
 580         kernel_supports_tls = true;
 581       }
 582       // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
 583     }
 584 #  endif  // HAVE_DECL_UNAME
 585 #endif    // HAVE_TLS
 586
 587 // __THROW is defined in glibc systems.  It means, counter-intuitively,
 588 // "This function will never throw an exception."  It's an optional
 589 // optimization tool, but we may need to use it to match glibc prototypes.
 590 #ifndef __THROW    // I guess we're not on a glibc system
 591 # define __THROW   // __THROW is just an optimization, so ok to make it ""
 592 #endif
 593
 594 //-------------------------------------------------------------------
 595 // Configuration
 596 //-------------------------------------------------------------------
 597
 598 // Not all possible combinations of the following parameters make
 599 // sense.  In particular, if kMaxSize increases, you may have to
 600 // increase kNumClasses as well.
 601 static const size_t kPageShift  = 12;
 602 static const size_t kPageSize   = 1 << kPageShift;
 603 static const size_t kMaxSize    = 8u * kPageSize;
 604 static const size_t kAlignShift = 3;
 605 static const size_t kAlignment  = 1 << kAlignShift;
 606 static const size_t kNumClasses = 68;
 607
 608 // Allocates a big block of memory for the pagemap once we reach more than
 609 // 128MB
 610 static const size_t kPageMapBigAllocationThreshold = 128 << 20;
 611
 612 // Minimum number of pages to fetch from system at a time.  Must be
 613 // significantly bigger than kPageSize to amortize system-call
 614 // overhead, and also to reduce external fragementation.  Also, we
 615 // should keep this value big because various incarnations of Linux
 616 // have small limits on the number of mmap() regions per
 617 // address-space.
 618 static const size_t kMinSystemAlloc = 1 << (20 - kPageShift);
 619
 620 // Number of objects to move between a per-thread list and a central
 621 // list in one shot.  We want this to be not too small so we can
 622 // amortize the lock overhead for accessing the central list.  Making
 623 // it too big may temporarily cause unnecessary memory wastage in the
 624 // per-thread free list until the scavenger cleans up the list.
 625 static int num_objects_to_move[kNumClasses];
 626
 627 // Maximum length we allow a per-thread free-list to have before we
 628 // move objects from it into the corresponding central free-list.  We
 629 // want this big to avoid locking the central free-list too often.  It
 630 // should not hurt to make this list somewhat big because the
 631 // scavenging code will shrink it down when its contents are not in use.
 632 static const int kMaxFreeListLength = 256;
 633
 634 // Lower and upper bounds on the per-thread cache sizes
 635 static const size_t kMinThreadCacheSize = kMaxSize * 2;
 636 #if PLATFORM(IOS)
 637 static const size_t kMaxThreadCacheSize = 512 * 1024;
 638 #else
 639 static const size_t kMaxThreadCacheSize = 2 << 20;
 640 #endif
 641
 642 // Default bound on the total amount of thread caches
 643 static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
 644
 645 // For all span-lengths < kMaxPages we keep an exact-size list.
 646 // REQUIRED: kMaxPages >= kMinSystemAlloc;
 647 static const size_t kMaxPages = kMinSystemAlloc;
 648
 649 /* The smallest prime > 2^n */
 650 static int primes_list[] = {
 651     // Small values might cause high rates of sampling
 652     // and hence commented out.
 653     // 2, 5, 11, 17, 37, 67, 131, 257,
 654     // 521, 1031, 2053, 4099, 8209, 16411,
 655     32771, 65537, 131101, 262147, 524309, 1048583,
 656     2097169, 4194319, 8388617, 16777259, 33554467 };
 657
 658 // Twice the approximate gap between sampling actions.
 659 // I.e., we take one sample approximately once every
 660 //      tcmalloc_sample_parameter/2
 661 // bytes of allocation, i.e., ~ once every 128KB.
 662 // Must be a prime number.
 663 #ifdef NO_TCMALLOC_SAMPLES
 664 DEFINE_int64(tcmalloc_sample_parameter, 0,
 665              "Unused: code is compiled with NO_TCMALLOC_SAMPLES");
 666 static size_t sample_period = 0;
 667 #else
 668 DEFINE_int64(tcmalloc_sample_parameter, 262147,
 669          "Twice the approximate gap between sampling actions."
 670          " Must be a prime number. Otherwise will be rounded up to a "
 671          " larger prime number");
 672 static size_t sample_period = 262147;
 673 #endif
 674
 675 // Protects sample_period above
 676 static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;
 677
 678 // Parameters for controlling how fast memory is returned to the OS.
 679
 680 DEFINE_double(tcmalloc_release_rate, 1,
 681               "Rate at which we release unused memory to the system.  "
 682               "Zero means we never release memory back to the system.  "
 683               "Increase this flag to return memory faster; decrease it "
 684               "to return memory slower.  Reasonable rates are in the "
 685               "range [0,10]");
 686
 687 //-------------------------------------------------------------------
 688 // Mapping from size to size_class and vice versa
 689 //-------------------------------------------------------------------
 690
 691 // Sizes <= 1024 have an alignment >= 8.  So for such sizes we have an
 692 // array indexed by ceil(size/8).  Sizes > 1024 have an alignment >= 128.
 693 // So for these larger sizes we have an array indexed by ceil(size/128).
 694 //
 695 // We flatten both logical arrays into one physical array and use
 696 // arithmetic to compute an appropriate index.  The constants used by
 697 // ClassIndex() were selected to make the flattening work.
 698 //
 699 // Examples:
 700 //   Size       Expression                      Index
 701 //   -------------------------------------------------------
 702 //   0          (0 + 7) / 8                     0
 703 //   1          (1 + 7) / 8                     1
 704 //   ...
 705 //   1024       (1024 + 7) / 8                  128
 706 //   1025       (1025 + 127 + (120<<7)) / 128   129
 707 //   ...
 708 //   32768      (32768 + 127 + (120<<7)) / 128  376
 709 static const size_t kMaxSmallSize = 1024;
 710 static const int shift_amount[2] = { 3, 7 };  // For divides by 8 or 128
 711 static const int add_amount[2] = { 7, 127 + (120 << 7) };
 712 static unsigned char class_array[377];
 713
 714 // Compute index of the class_array[] entry for a given size
 715 static inline int ClassIndex(size_t s) {
 716   const int i = (s > kMaxSmallSize);
 717   return static_cast<int>((s + add_amount[i]) >> shift_amount[i]);
 718 }
 719
 720 // Mapping from size class to max size storable in that class
 721 static size_t class_to_size[kNumClasses];
 722
 723 // Mapping from size class to number of pages to allocate at a time
 724 static size_t class_to_pages[kNumClasses];
 725
 726 // TransferCache is used to cache transfers of num_objects_to_move[size_class]
 727 // back and forth between thread caches and the central cache for a given size
 728 // class.
 729 struct TCEntry {
 730   void *head;  // Head of chain of objects.
 731   void *tail;  // Tail of chain of objects.
 732 };
 733 // A central cache freelist can have anywhere from 0 to kNumTransferEntries
 734 // slots to put link list chains into.  To keep memory usage bounded the total
 735 // number of TCEntries across size classes is fixed.  Currently each size
 736 // class is initially given one TCEntry which also means that the maximum any
 737 // one class can have is kNumClasses.
 738 static const int kNumTransferEntries = kNumClasses;
 739
 740 // Note: the following only works for "n"s that fit in 32-bits, but
 741 // that is fine since we only use it for small sizes.
 742 static inline int LgFloor(size_t n) {
 743   int log = 0;
 744   for (int i = 4; i >= 0; --i) {
 745     int shift = (1 << i);
 746     size_t x = n >> shift;
 747     if (x != 0) {
 748       n = x;
 749       log += shift;
 750     }
 751   }
 752   ASSERT(n == 1);
 753   return log;
 754 }
 755
 756 // Some very basic linked list functions for dealing with using void * as
 757 // storage.
 758
 759 static inline void *SLL_Next(void *t) {
 760   return *(reinterpret_cast<void**>(t));
 761 }
 762
 763 static inline void SLL_SetNext(void *t, void *n) {
 764   *(reinterpret_cast<void**>(t)) = n;
 765 }
 766
 767 static inline void SLL_Push(void **list, void *element) {
 768   SLL_SetNext(element, *list);
 769   *list = element;
 770 }
 771
 772 static inline void *SLL_Pop(void **list) {
 773   void *result = *list;
 774   *list = SLL_Next(*list);
 775   return result;
 776 }
 777
 778
 779 // Remove N elements from a linked list to which head points.  head will be
 780 // modified to point to the new head.  start and end will point to the first
 781 // and last nodes of the range.  Note that end will point to NULL after this
 782 // function is called.
 783 static inline void SLL_PopRange(void **head, int N, void **start, void **end) {
 784   if (N == 0) {
 785     *start = NULL;
 786     *end = NULL;
 787     return;
 788   }
 789
 790   void *tmp = *head;
 791   for (int i = 1; i < N; ++i) {
 792     tmp = SLL_Next(tmp);
 793   }
 794
 795   *start = *head;
 796   *end = tmp;
 797   *head = SLL_Next(tmp);
 798   // Unlink range from list.
 799   SLL_SetNext(tmp, NULL);
 800 }
 801
 802 static inline void SLL_PushRange(void **head, void *start, void *end) {
 803   if (!start) return;
 804   SLL_SetNext(end, *head);
 805   *head = start;
 806 }
 807
 808 static inline size_t SLL_Size(void *head) {
 809   int count = 0;
 810   while (head) {
 811     count++;
 812     head = SLL_Next(head);
 813   }
 814   return count;
 815 }
 816
 817 // Setup helper functions.
 818
 819 static ALWAYS_INLINE size_t SizeClass(size_t size) {
 820   return class_array[ClassIndex(size)];
 821 }
 822
 823 // Get the byte-size for a specified class
 824 static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) {
 825   return class_to_size[cl];
 826 }
 827 static int NumMoveSize(size_t size) {
 828   if (size == 0) return 0;
 829   // Use approx 64k transfers between thread and central caches.
 830   int num = static_cast<int>(64.0 * 1024.0 / size);
 831   if (num < 2) num = 2;
 832   // Clamp well below kMaxFreeListLength to avoid ping pong between central
 833   // and thread caches.
 834   if (num > static_cast<int>(0.8 * kMaxFreeListLength))
 835     num = static_cast<int>(0.8 * kMaxFreeListLength);
 836
 837   // Also, avoid bringing in too many objects into small object free
 838   // lists.  There are lots of such lists, and if we allow each one to
 839   // fetch too many at a time, we end up having to scavenge too often
 840   // (especially when there are lots of threads and each thread gets a
 841   // small allowance for its thread cache).
 842   //
 843   // TODO: Make thread cache free list sizes dynamic so that we do not
 844   // have to equally divide a fixed resource amongst lots of threads.
 845   if (num > 32) num = 32;
 846
 847   return num;
 848 }
 849
 850 // Initialize the mapping arrays
 851 static void InitSizeClasses() {
 852   // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
 853   if (ClassIndex(0) < 0) {
 854     MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0));
 855     CRASH();
 856   }
 857   if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) {
 858     MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize));
 859     CRASH();
 860   }
 861
 862   // Compute the size classes we want to use
 863   size_t sc = 1;   // Next size class to assign
 864   unsigned char alignshift = kAlignShift;
 865   int last_lg = -1;
 866   for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) {
 867     int lg = LgFloor(size);
 868     if (lg > last_lg) {
 869       // Increase alignment every so often.
 870       //
 871       // Since we double the alignment every time size doubles and
 872       // size >= 128, this means that space wasted due to alignment is
 873       // at most 16/128 i.e., 12.5%.  Plus we cap the alignment at 256
 874       // bytes, so the space wasted as a percentage starts falling for
 875       // sizes > 2K.
 876       if ((lg >= 7) && (alignshift < 8)) {
 877         alignshift++;
 878       }
 879       last_lg = lg;
 880     }
 881
 882     // Allocate enough pages so leftover is less than 1/8 of total.
 883     // This bounds wasted space to at most 12.5%.
 884     size_t psize = kPageSize;
 885     while ((psize % size) > (psize >> 3)) {
 886       psize += kPageSize;
 887     }
 888     const size_t my_pages = psize >> kPageShift;
 889
 890     if (sc > 1 && my_pages == class_to_pages[sc-1]) {
 891       // See if we can merge this into the previous class without
 892       // increasing the fragmentation of the previous class.
 893       const size_t my_objects = (my_pages << kPageShift) / size;
 894       const size_t prev_objects = (class_to_pages[sc-1] << kPageShift)
 895                                   / class_to_size[sc-1];
 896       if (my_objects == prev_objects) {
 897         // Adjust last class to include this size
 898         class_to_size[sc-1] = size;
 899         continue;
 900       }
 901     }
 902
 903     // Add new class
 904     class_to_pages[sc] = my_pages;
 905     class_to_size[sc] = size;
 906     sc++;
 907   }
 908   if (sc != kNumClasses) {
 909     MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n",
 910             sc, int(kNumClasses));
 911     CRASH();
 912   }
 913
 914   // Initialize the mapping arrays
 915   int next_size = 0;
 916   for (unsigned char c = 1; c < kNumClasses; c++) {
 917     const size_t max_size_in_class = class_to_size[c];
 918     for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) {
 919       class_array[ClassIndex(s)] = c;
 920     }
 921     next_size = static_cast<int>(max_size_in_class + kAlignment);
 922   }
 923
 924   // Double-check sizes just to be safe
 925   for (size_t size = 0; size <= kMaxSize; size++) {
 926     const size_t sc = SizeClass(size);
 927     if (sc == 0) {
 928       MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
 929       CRASH();
 930     }
 931     if (sc > 1 && size <= class_to_size[sc-1]) {
 932       MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS
 933               "\n", sc, size);
 934       CRASH();
 935     }
 936     if (sc >= kNumClasses) {
 937       MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
 938       CRASH();
 939     }
 940     const size_t s = class_to_size[sc];
 941     if (size > s) {
 942      MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
 943       CRASH();
 944     }
 945     if (s == 0) {
 946       MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
 947       CRASH();
 948     }
 949   }
 950
 951   // Initialize the num_objects_to_move array.
 952   for (size_t cl = 1; cl  < kNumClasses; ++cl) {
 953     num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl));
 954   }
 955
 956 #ifndef WTF_CHANGES
 957   if (false) {
 958     // Dump class sizes and maximum external wastage per size class
 959     for (size_t cl = 1; cl  < kNumClasses; ++cl) {
 960       const int alloc_size = class_to_pages[cl] << kPageShift;
 961       const int alloc_objs = alloc_size / class_to_size[cl];
 962       const int min_used = (class_to_size[cl-1] + 1) * alloc_objs;
 963       const int max_waste = alloc_size - min_used;
 964       MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n",
 965               int(cl),
 966               int(class_to_size[cl-1] + 1),
 967               int(class_to_size[cl]),
 968               int(class_to_pages[cl] << kPageShift),
 969               max_waste * 100.0 / alloc_size
 970               );
 971     }
 972   }
 973 #endif
 974 }
 975
 976 // -------------------------------------------------------------------------
 977 // Simple allocator for objects of a specified type.  External locking
 978 // is required before accessing one of these objects.
 979 // -------------------------------------------------------------------------
 980
 981 // Metadata allocator -- keeps stats about how many bytes allocated
 982 static uint64_t metadata_system_bytes = 0;
 983 static void* MetaDataAlloc(size_t bytes) {
 984   void* result = TCMalloc_SystemAlloc(bytes, 0);
 985   if (result != NULL) {
 986     metadata_system_bytes += bytes;
 987   }
 988   return result;
 989 }
 990
 991 template <class T>
 992 class PageHeapAllocator {
 993  private:
 994   // How much to allocate from system at a time
 995   static const size_t kAllocIncrement = 32 << 10;
 996
 997   // Aligned size of T
 998   static const size_t kAlignedSize
 999   = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
1000
1001   // Free area from which to carve new objects
1002   char* free_area_;
1003   size_t free_avail_;
1004
1005   // Linked list of all regions allocated by this allocator
1006   void* allocated_regions_;
1007
1008   // Free list of already carved objects
1009   void* free_list_;
1010
1011   // Number of allocated but unfreed objects
1012   int inuse_;
1013
1014  public:
1015   void Init() {
1016     ASSERT(kAlignedSize <= kAllocIncrement);
1017     inuse_ = 0;
1018     allocated_regions_ = 0;
1019     free_area_ = NULL;
1020     free_avail_ = 0;
1021     free_list_ = NULL;
1022   }
1023
1024   T* New() {
1025     // Consult free list
1026     void* result;
1027     if (free_list_ != NULL) {
1028       result = free_list_;
1029       free_list_ = *(reinterpret_cast<void**>(result));
1030     } else {
1031       if (free_avail_ < kAlignedSize) {
1032         // Need more room
1033         char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
1034         if (!new_allocation)
1035           CRASH();
1036
1037         *reinterpret_cast_ptr<void**>(new_allocation) = allocated_regions_;
1038         allocated_regions_ = new_allocation;
1039         free_area_ = new_allocation + kAlignedSize;
1040         free_avail_ = kAllocIncrement - kAlignedSize;
1041       }
1042       result = free_area_;
1043       free_area_ += kAlignedSize;
1044       free_avail_ -= kAlignedSize;
1045     }
1046     inuse_++;
1047     return reinterpret_cast<T*>(result);
1048   }
1049
1050   void Delete(T* p) {
1051     *(reinterpret_cast<void**>(p)) = free_list_;
1052     free_list_ = p;
1053     inuse_--;
1054   }
1055
1056   int inuse() const { return inuse_; }
1057
1058 #if defined(WTF_CHANGES) && OS(DARWIN)
1059   template <class Recorder>
1060   void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader)
1061   {
1062       for (void* adminAllocation = allocated_regions_; adminAllocation; adminAllocation = reader.nextEntryInLinkedList(reinterpret_cast<void**>(adminAllocation)))
1063           recorder.recordRegion(reinterpret_cast<vm_address_t>(adminAllocation), kAllocIncrement);
1064   }
1065 #endif
1066 };
1067
1068 // -------------------------------------------------------------------------
1069 // Span - a contiguous run of pages
1070 // -------------------------------------------------------------------------
1071
1072 // Type that can hold a page number
1073 typedef uintptr_t PageID;
1074
1075 // Type that can hold the length of a run of pages
1076 typedef uintptr_t Length;
1077
1078 static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
1079
1080 // Convert byte size into pages.  This won't overflow, but may return
1081 // an unreasonably large value if bytes is huge enough.
1082 static inline Length pages(size_t bytes) {
1083   return (bytes >> kPageShift) +
1084       ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
1085 }
1086
1087 // Convert a user size into the number of bytes that will actually be
1088 // allocated
1089 static size_t AllocationSize(size_t bytes) {
1090   if (bytes > kMaxSize) {
1091     // Large object: we allocate an integral number of pages
1092     ASSERT(bytes <= (kMaxValidPages << kPageShift));
1093     return pages(bytes) << kPageShift;
1094   } else {
1095     // Small object: find the size class to which it belongs
1096     return ByteSizeForClass(SizeClass(bytes));
1097   }
1098 }
1099
1100 // Information kept for a span (a contiguous run of pages).
1101 struct Span {
1102   PageID        start;          // Starting page number
1103   Length        length;         // Number of pages in span
1104   Span*         next;           // Used when in link list
1105   Span*         prev;           // Used when in link list
1106   void*         objects;        // Linked list of free objects
1107   unsigned int  free : 1;       // Is the span free
1108 #ifndef NO_TCMALLOC_SAMPLES
1109   unsigned int  sample : 1;     // Sampled object?
1110 #endif
1111   unsigned int  sizeclass : 8;  // Size-class for small objects (or 0)
1112   unsigned int  refcount : 11;  // Number of non-free objects
1113   bool decommitted : 1;
1114
1115 #undef SPAN_HISTORY
1116 #ifdef SPAN_HISTORY
1117   // For debugging, we can keep a log events per span
1118   int nexthistory;
1119   char history[64];
1120   int value[64];
1121 #endif
1122 };
1123
1124 #define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted)
1125
1126 #ifdef SPAN_HISTORY
1127 void Event(Span* span, char op, int v = 0) {
1128   span->history[span->nexthistory] = op;
1129   span->value[span->nexthistory] = v;
1130   span->nexthistory++;
1131   if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0;
1132 }
1133 #else
1134 #define Event(s,o,v) ((void) 0)
1135 #endif
1136
1137 // Allocator/deallocator for spans
1138 static PageHeapAllocator<Span> span_allocator;
1139 static Span* NewSpan(PageID p, Length len) {
1140   Span* result = span_allocator.New();
1141   memset(result, 0, sizeof(*result));
1142   result->start = p;
1143   result->length = len;
1144 #ifdef SPAN_HISTORY
1145   result->nexthistory = 0;
1146 #endif
1147   return result;
1148 }
1149
1150 static inline void DeleteSpan(Span* span) {
1151 #ifndef NDEBUG
1152   // In debug mode, trash the contents of deleted Spans
1153   memset(span, 0x3f, sizeof(*span));
1154 #endif
1155   span_allocator.Delete(span);
1156 }
1157
1158 // -------------------------------------------------------------------------
1159 // Doubly linked list of spans.
1160 // -------------------------------------------------------------------------
1161
1162 static inline void DLL_Init(Span* list) {
1163   list->next = list;
1164   list->prev = list;
1165 }
1166
1167 static inline void DLL_Remove(Span* span) {
1168   span->prev->next = span->next;
1169   span->next->prev = span->prev;
1170   span->prev = NULL;
1171   span->next = NULL;
1172 }
1173
1174 static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) {
1175   return list->next == list;
1176 }
1177
1178 static int DLL_Length(const Span* list) {
1179   int result = 0;
1180   for (Span* s = list->next; s != list; s = s->next) {
1181     result++;
1182   }
1183   return result;
1184 }
1185
1186 #if 0 /* Not needed at the moment -- causes compiler warnings if not used */
1187 static void DLL_Print(const char* label, const Span* list) {
1188   MESSAGE("%-10s %p:", label, list);
1189   for (const Span* s = list->next; s != list; s = s->next) {
1190     MESSAGE(" <%p,%u,%u>", s, s->start, s->length);
1191   }
1192   MESSAGE("\n");
1193 }
1194 #endif
1195
1196 static inline void DLL_Prepend(Span* list, Span* span) {
1197   ASSERT(span->next == NULL);
1198   ASSERT(span->prev == NULL);
1199   span->next = list->next;
1200   span->prev = list;
1201   list->next->prev = span;
1202   list->next = span;
1203 }
1204
1205 // -------------------------------------------------------------------------
1206 // Stack traces kept for sampled allocations
1207 //   The following state is protected by pageheap_lock_.
1208 // -------------------------------------------------------------------------
1209
1210 // size/depth are made the same size as a pointer so that some generic
1211 // code below can conveniently cast them back and forth to void*.
1212 static const int kMaxStackDepth = 31;
1213 struct StackTrace {
1214   uintptr_t size;          // Size of object
1215   uintptr_t depth;         // Number of PC values stored in array below
1216   void*     stack[kMaxStackDepth];
1217 };
1218 static PageHeapAllocator<StackTrace> stacktrace_allocator;
1219 static Span sampled_objects;
1220
1221 // -------------------------------------------------------------------------
1222 // Map from page-id to per-page data
1223 // -------------------------------------------------------------------------
1224
1225 // We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
1226 // We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
1227 // because sometimes the sizeclass is all the information we need.
1228
1229 // Selector class -- general selector uses 3-level map
1230 template <int BITS> class MapSelector {
1231  public:
1232   typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
1233   typedef PackedCache<BITS, uint64_t> CacheType;
1234 };
1235
1236 #if defined(WTF_CHANGES)
1237 #if CPU(X86_64)
1238 // On all known X86-64 platforms, the upper 16 bits are always unused and therefore
1239 // can be excluded from the PageMap key.
1240 // See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
1241
1242 static const size_t kBitsUnusedOn64Bit = 16;
1243 #else
1244 static const size_t kBitsUnusedOn64Bit = 0;
1245 #endif
1246
1247 // A three-level map for 64-bit machines
1248 template <> class MapSelector<64> {
1249  public:
1250   typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type;
1251   typedef PackedCache<64, uint64_t> CacheType;
1252 };
1253 #endif
1254
1255 // A two-level map for 32-bit machines
1256 template <> class MapSelector<32> {
1257  public:
1258   typedef TCMalloc_PageMap2<32 - kPageShift> Type;
1259   typedef PackedCache<32 - kPageShift, uint16_t> CacheType;
1260 };
1261
1262 // -------------------------------------------------------------------------
1263 // Page-level allocator
1264 //  * Eager coalescing
1265 //
1266 // Heap for page-level allocation.  We allow allocating and freeing a
1267 // contiguous runs of pages (called a "span").
1268 // -------------------------------------------------------------------------
1269
1270 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1271 // The page heap maintains a free list for spans that are no longer in use by
1272 // the central cache or any thread caches. We use a background thread to
1273 // periodically scan the free list and release a percentage of it back to the OS.
1274
1275 // If free_committed_pages_ exceeds kMinimumFreeCommittedPageCount, the
1276 // background thread:
1277 //     - wakes up
1278 //     - pauses for kScavengeDelayInSeconds
1279 //     - returns to the OS a percentage of the memory that remained unused during
1280 //       that pause (kScavengePercentage * min_free_committed_pages_since_last_scavenge_)
1281 // The goal of this strategy is to reduce memory pressure in a timely fashion
1282 // while avoiding thrashing the OS allocator.
1283
1284 // Time delay before the page heap scavenger will consider returning pages to
1285 // the OS.
1286 static const int kScavengeDelayInSeconds = 2;
1287
1288 // Approximate percentage of free committed pages to return to the OS in one
1289 // scavenge.
1290 static const float kScavengePercentage = .5f;
1291
1292 // number of span lists to keep spans in when memory is returned.
1293 static const int kMinSpanListsWithSpans = 32;
1294
1295 // Number of free committed pages that we want to keep around.  The minimum number of pages used when there
1296 // is 1 span in each of the first kMinSpanListsWithSpans spanlists.  Currently 528 pages.
1297 static const size_t kMinimumFreeCommittedPageCount = kMinSpanListsWithSpans * ((1.0f+kMinSpanListsWithSpans) / 2.0f);
1298
1299 #endif
1300
1301 static SpinLock pageheap_lock = SPINLOCK_INITIALIZER;
1302
1303 class TCMalloc_PageHeap {
1304  public:
1305   void init();
1306
1307   // Allocate a run of "n" pages.  Returns zero if out of memory.
1308   Span* New(Length n);
1309
1310   // Delete the span "[p, p+n-1]".
1311   // REQUIRES: span was returned by earlier call to New() and
1312   //           has not yet been deleted.
1313   void Delete(Span* span);
1314
1315   // Mark an allocated span as being used for small objects of the
1316   // specified size-class.
1317   // REQUIRES: span was returned by an earlier call to New()
1318   //           and has not yet been deleted.
1319   void RegisterSizeClass(Span* span, size_t sc);
1320
1321   // Split an allocated span into two spans: one of length "n" pages
1322   // followed by another span of length "span->length - n" pages.
1323   // Modifies "*span" to point to the first span of length "n" pages.
1324   // Returns a pointer to the second span.
1325   //
1326   // REQUIRES: "0 < n < span->length"
1327   // REQUIRES: !span->free
1328   // REQUIRES: span->sizeclass == 0
1329   Span* Split(Span* span, Length n);
1330
1331   // Return the descriptor for the specified page.
1332   inline Span* GetDescriptor(PageID p) const {
1333     return reinterpret_cast<Span*>(pagemap_.get(p));
1334   }
1335
1336 #ifdef WTF_CHANGES
1337   inline Span* GetDescriptorEnsureSafe(PageID p)
1338   {
1339       pagemap_.Ensure(p, 1);
1340       return GetDescriptor(p);
1341   }
1342
1343   size_t ReturnedBytes() const;
1344 #endif
1345
1346   // Dump state to stderr
1347 #ifndef WTF_CHANGES
1348   void Dump(TCMalloc_Printer* out);
1349 #endif
1350
1351   // Return number of bytes allocated from system
1352   inline uint64_t SystemBytes() const { return system_bytes_; }
1353
1354   // Return number of free bytes in heap
1355   uint64_t FreeBytes() const {
1356     return (static_cast<uint64_t>(free_pages_) << kPageShift);
1357   }
1358
1359   bool Check();
1360   size_t CheckList(Span* list, Length min_pages, Length max_pages, bool decommitted);
1361
1362   // Release all pages on the free list for reuse by the OS:
1363   void ReleaseFreePages();
1364   void ReleaseFreeList(Span*, Span*);
1365
1366   // Return 0 if we have no information, or else the correct sizeclass for p.
1367   // Reads and writes to pagemap_cache_ do not require locking.
1368   // The entries are 64 bits on 64-bit hardware and 16 bits on
1369   // 32-bit hardware, and we don't mind raciness as long as each read of
1370   // an entry yields a valid entry, not a partially updated entry.
1371   size_t GetSizeClassIfCached(PageID p) const {
1372     return pagemap_cache_.GetOrDefault(p, 0);
1373   }
1374   void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); }
1375
1376  private:
1377   // Pick the appropriate map and cache types based on pointer size
1378   typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
1379   typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache;
1380   PageMap pagemap_;
1381   mutable PageMapCache pagemap_cache_;
1382
1383   // We segregate spans of a given size into two circular linked
1384   // lists: one for normal spans, and one for spans whose memory
1385   // has been returned to the system.
1386   struct SpanList {
1387     Span        normal;
1388     Span        returned;
1389   };
1390
1391   // List of free spans of length >= kMaxPages
1392   SpanList large_;
1393
1394   // Array mapping from span length to a doubly linked list of free spans
1395   SpanList free_[kMaxPages];
1396
1397   // Number of pages kept in free lists
1398   uintptr_t free_pages_;
1399
1400   // Bytes allocated from system
1401   uint64_t system_bytes_;
1402
1403 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1404   // Number of pages kept in free lists that are still committed.
1405   Length free_committed_pages_;
1406
1407   // Minimum number of free committed pages since last scavenge. (Can be 0 if
1408   // we've committed new pages since the last scavenge.)
1409   Length min_free_committed_pages_since_last_scavenge_;
1410 #endif
1411
1412   bool GrowHeap(Length n);
1413
1414   // REQUIRES   span->length >= n
1415   // Remove span from its free list, and move any leftover part of
1416   // span into appropriate free lists.  Also update "span" to have
1417   // length exactly "n" and mark it as non-free so it can be returned
1418   // to the client.
1419   //
1420   // "released" is true iff "span" was found on a "returned" list.
1421   void Carve(Span* span, Length n, bool released);
1422
1423   void RecordSpan(Span* span) {
1424     pagemap_.set(span->start, span);
1425     if (span->length > 1) {
1426       pagemap_.set(span->start + span->length - 1, span);
1427     }
1428   }
1429
1430     // Allocate a large span of length == n.  If successful, returns a
1431   // span of exactly the specified length.  Else, returns NULL.
1432   Span* AllocLarge(Length n);
1433
1434 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1435   // Incrementally release some memory to the system.
1436   // IncrementalScavenge(n) is called whenever n pages are freed.
1437   void IncrementalScavenge(Length n);
1438 #endif
1439
1440   // Number of pages to deallocate before doing more scavenging
1441   int64_t scavenge_counter_;
1442
1443   // Index of last free list we scavenged
1444   size_t scavenge_index_;
1445
1446 #if defined(WTF_CHANGES) && OS(DARWIN)
1447   friend class FastMallocZone;
1448 #endif
1449
1450 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1451   void initializeScavenger();
1452   ALWAYS_INLINE void signalScavenger();
1453   void scavenge();
1454   ALWAYS_INLINE bool shouldScavenge() const;
1455
1456 #if HAVE(DISPATCH_H) || OS(WINDOWS)
1457   void periodicScavenge();
1458   ALWAYS_INLINE bool isScavengerSuspended();
1459   ALWAYS_INLINE void scheduleScavenger();
1460   ALWAYS_INLINE void rescheduleScavenger();
1461   ALWAYS_INLINE void suspendScavenger();
1462 #endif
1463
1464 #if HAVE(DISPATCH_H)
1465   dispatch_queue_t m_scavengeQueue;
1466   dispatch_source_t m_scavengeTimer;
1467   bool m_scavengingSuspended;
1468 #elif OS(WINDOWS)
1469   static void CALLBACK scavengerTimerFired(void*, BOOLEAN);
1470   HANDLE m_scavengeQueueTimer;
1471 #else
1472   static NO_RETURN_WITH_VALUE void* runScavengerThread(void*);
1473   NO_RETURN void scavengerThread();
1474
1475   // Keeps track of whether the background thread is actively scavenging memory every kScavengeDelayInSeconds, or
1476   // it's blocked waiting for more pages to be deleted.
1477   bool m_scavengeThreadActive;
1478
1479   pthread_mutex_t m_scavengeMutex;
1480   pthread_cond_t m_scavengeCondition;
1481 #endif
1482
1483 #endif  // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1484 };
1485
1486 void TCMalloc_PageHeap::init()
1487 {
1488   pagemap_.init(MetaDataAlloc);
1489   pagemap_cache_ = PageMapCache(0);
1490   free_pages_ = 0;
1491   system_bytes_ = 0;
1492
1493 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1494   free_committed_pages_ = 0;
1495   min_free_committed_pages_since_last_scavenge_ = 0;
1496 #endif  // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1497
1498   scavenge_counter_ = 0;
1499   // Start scavenging at kMaxPages list
1500   scavenge_index_ = kMaxPages-1;
1501   COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
1502   DLL_Init(&large_.normal);
1503   DLL_Init(&large_.returned);
1504   for (size_t i = 0; i < kMaxPages; i++) {
1505     DLL_Init(&free_[i].normal);
1506     DLL_Init(&free_[i].returned);
1507   }
1508
1509 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1510   initializeScavenger();
1511 #endif  // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1512 }
1513
1514 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1515
1516 #if HAVE(DISPATCH_H)
1517
1518 void TCMalloc_PageHeap::initializeScavenger()
1519 {
1520     m_scavengeQueue = dispatch_queue_create("com.apple.JavaScriptCore.FastMallocSavenger", NULL);
1521     m_scavengeTimer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, m_scavengeQueue);
1522     dispatch_time_t startTime = dispatch_time(DISPATCH_TIME_NOW, kScavengeDelayInSeconds * NSEC_PER_SEC);
1523     dispatch_source_set_timer(m_scavengeTimer, startTime, kScavengeDelayInSeconds * NSEC_PER_SEC, 1000 * NSEC_PER_USEC);
1524     dispatch_source_set_event_handler(m_scavengeTimer, ^{ periodicScavenge(); });
1525     m_scavengingSuspended = true;
1526 }
1527
1528 ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended()
1529 {
1530     ASSERT(pageheap_lock.IsHeld());
1531     return m_scavengingSuspended;
1532 }
1533
1534 ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger()
1535 {
1536     ASSERT(pageheap_lock.IsHeld());
1537     m_scavengingSuspended = false;
1538     dispatch_resume(m_scavengeTimer);
1539 }
1540
1541 ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger()
1542 {
1543     // Nothing to do here for libdispatch.
1544 }
1545
1546 ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger()
1547 {
1548     ASSERT(pageheap_lock.IsHeld());
1549     m_scavengingSuspended = true;
1550     dispatch_suspend(m_scavengeTimer);
1551 }
1552
1553 #elif OS(WINDOWS)
1554
1555 void TCMalloc_PageHeap::scavengerTimerFired(void* context, BOOLEAN)
1556 {
1557     static_cast<TCMalloc_PageHeap*>(context)->periodicScavenge();
1558 }
1559
1560 void TCMalloc_PageHeap::initializeScavenger()
1561 {
1562     m_scavengeQueueTimer = 0;
1563 }
1564
1565 ALWAYS_INLINE bool TCMalloc_PageHeap::isScavengerSuspended()
1566 {
1567     ASSERT(IsHeld(pageheap_lock));
1568     return !m_scavengeQueueTimer;
1569 }
1570
1571 ALWAYS_INLINE void TCMalloc_PageHeap::scheduleScavenger()
1572 {
1573     // We need to use WT_EXECUTEONLYONCE here and reschedule the timer, because
1574     // Windows will fire the timer event even when the function is already running.
1575     ASSERT(IsHeld(pageheap_lock));
1576     CreateTimerQueueTimer(&m_scavengeQueueTimer, 0, scavengerTimerFired, this, kScavengeDelayInSeconds * 1000, 0, WT_EXECUTEONLYONCE);
1577 }
1578
1579 ALWAYS_INLINE void TCMalloc_PageHeap::rescheduleScavenger()
1580 {
1581     // We must delete the timer and create it again, because it is not possible to retrigger a timer on Windows.
1582     suspendScavenger();
1583     scheduleScavenger();
1584 }
1585
1586 ALWAYS_INLINE void TCMalloc_PageHeap::suspendScavenger()
1587 {
1588     ASSERT(IsHeld(pageheap_lock));
1589     HANDLE scavengeQueueTimer = m_scavengeQueueTimer;
1590     m_scavengeQueueTimer = 0;
1591     DeleteTimerQueueTimer(0, scavengeQueueTimer, 0);
1592 }
1593
1594 #else
1595
1596 void TCMalloc_PageHeap::initializeScavenger()
1597 {
1598     // Create a non-recursive mutex.
1599 #if !defined(PTHREAD_MUTEX_NORMAL) || PTHREAD_MUTEX_NORMAL == PTHREAD_MUTEX_DEFAULT
1600     pthread_mutex_init(&m_scavengeMutex, 0);
1601 #else
1602     pthread_mutexattr_t attr;
1603     pthread_mutexattr_init(&attr);
1604     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL);
1605
1606     pthread_mutex_init(&m_scavengeMutex, &attr);
1607
1608     pthread_mutexattr_destroy(&attr);
1609 #endif
1610
1611     pthread_cond_init(&m_scavengeCondition, 0);
1612     m_scavengeThreadActive = true;
1613     pthread_t thread;
1614     pthread_create(&thread, 0, runScavengerThread, this);
1615 }
1616
1617 void* TCMalloc_PageHeap::runScavengerThread(void* context)
1618 {
1619     static_cast<TCMalloc_PageHeap*>(context)->scavengerThread();
1620 #if (COMPILER(MSVC) || COMPILER(SUNCC))
1621     // Without this, Visual Studio and Sun Studio will complain that this method does not return a value.
1622     return 0;
1623 #endif
1624 }
1625
1626 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger()
1627 {
1628     // m_scavengeMutex should be held before accessing m_scavengeThreadActive.
1629     ASSERT(pthread_mutex_trylock(m_scavengeMutex));
1630     if (!m_scavengeThreadActive && shouldScavenge())
1631         pthread_cond_signal(&m_scavengeCondition);
1632 }
1633
1634 #endif
1635
1636 void TCMalloc_PageHeap::scavenge()
1637 {
1638     size_t pagesToRelease = min_free_committed_pages_since_last_scavenge_ * kScavengePercentage;
1639     size_t targetPageCount = std::max<size_t>(kMinimumFreeCommittedPageCount, free_committed_pages_ - pagesToRelease);
1640
1641     Length lastFreeCommittedPages = free_committed_pages_;
1642     while (free_committed_pages_ > targetPageCount) {
1643         ASSERT(Check());
1644         for (int i = kMaxPages; i > 0 && free_committed_pages_ >= targetPageCount; i--) {
1645             SpanList* slist = (static_cast<size_t>(i) == kMaxPages) ? &large_ : &free_[i];
1646             // If the span size is bigger than kMinSpanListsWithSpans pages return all the spans in the list, else return all but 1 span.
1647             // Return only 50% of a spanlist at a time so spans of size 1 are not the only ones left.
1648             size_t length = DLL_Length(&slist->normal);
1649             size_t numSpansToReturn = (i > kMinSpanListsWithSpans) ? length : length / 2;
1650             for (int j = 0; static_cast<size_t>(j) < numSpansToReturn && !DLL_IsEmpty(&slist->normal) && free_committed_pages_ > targetPageCount; j++) {
1651                 Span* s = slist->normal.prev;
1652                 DLL_Remove(s);
1653                 ASSERT(!s->decommitted);
1654                 if (!s->decommitted) {
1655                     TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1656                                            static_cast<size_t>(s->length << kPageShift));
1657                     ASSERT(free_committed_pages_ >= s->length);
1658                     free_committed_pages_ -= s->length;
1659                     s->decommitted = true;
1660                 }
1661                 DLL_Prepend(&slist->returned, s);
1662             }
1663         }
1664
1665         if (lastFreeCommittedPages == free_committed_pages_)
1666             break;
1667         lastFreeCommittedPages = free_committed_pages_;
1668     }
1669
1670     min_free_committed_pages_since_last_scavenge_ = free_committed_pages_;
1671 }
1672
1673 ALWAYS_INLINE bool TCMalloc_PageHeap::shouldScavenge() const
1674 {
1675     return free_committed_pages_ > kMinimumFreeCommittedPageCount;
1676 }
1677
1678 #endif  // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1679
1680 inline Span* TCMalloc_PageHeap::New(Length n) {
1681   ASSERT(Check());
1682   ASSERT(n > 0);
1683
1684   // Find first size >= n that has a non-empty list
1685   for (Length s = n; s < kMaxPages; s++) {
1686     Span* ll = NULL;
1687     bool released = false;
1688     if (!DLL_IsEmpty(&free_[s].normal)) {
1689       // Found normal span
1690       ll = &free_[s].normal;
1691     } else if (!DLL_IsEmpty(&free_[s].returned)) {
1692       // Found returned span; reallocate it
1693       ll = &free_[s].returned;
1694       released = true;
1695     } else {
1696       // Keep looking in larger classes
1697       continue;
1698     }
1699
1700     Span* result = ll->next;
1701     Carve(result, n, released);
1702 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1703     // The newly allocated memory is from a span that's in the normal span list (already committed).  Update the
1704     // free committed pages count.
1705     ASSERT(free_committed_pages_ >= n);
1706     free_committed_pages_ -= n;
1707     if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_)
1708       min_free_committed_pages_since_last_scavenge_ = free_committed_pages_;
1709 #endif  // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1710     ASSERT(Check());
1711     free_pages_ -= n;
1712     return result;
1713   }
1714
1715   Span* result = AllocLarge(n);
1716   if (result != NULL) {
1717       ASSERT_SPAN_COMMITTED(result);
1718       return result;
1719   }
1720
1721   // Grow the heap and try again
1722   if (!GrowHeap(n)) {
1723     ASSERT(Check());
1724     return NULL;
1725   }
1726
1727   return New(n);
1728 }
1729
1730 Span* TCMalloc_PageHeap::AllocLarge(Length n) {
1731   // find the best span (closest to n in size).
1732   // The following loops implements address-ordered best-fit.
1733   bool from_released = false;
1734   Span *best = NULL;
1735
1736   // Search through normal list
1737   for (Span* span = large_.normal.next;
1738        span != &large_.normal;
1739        span = span->next) {
1740     if (span->length >= n) {
1741       if ((best == NULL)
1742           || (span->length < best->length)
1743           || ((span->length == best->length) && (span->start < best->start))) {
1744         best = span;
1745         from_released = false;
1746       }
1747     }
1748   }
1749
1750   // Search through released list in case it has a better fit
1751   for (Span* span = large_.returned.next;
1752        span != &large_.returned;
1753        span = span->next) {
1754     if (span->length >= n) {
1755       if ((best == NULL)
1756           || (span->length < best->length)
1757           || ((span->length == best->length) && (span->start < best->start))) {
1758         best = span;
1759         from_released = true;
1760       }
1761     }
1762   }
1763
1764   if (best != NULL) {
1765     Carve(best, n, from_released);
1766 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1767     // The newly allocated memory is from a span that's in the normal span list (already committed).  Update the
1768     // free committed pages count.
1769     ASSERT(free_committed_pages_ >= n);
1770     free_committed_pages_ -= n;
1771     if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_)
1772       min_free_committed_pages_since_last_scavenge_ = free_committed_pages_;
1773 #endif  // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1774     ASSERT(Check());
1775     free_pages_ -= n;
1776     return best;
1777   }
1778   return NULL;
1779 }
1780
1781 Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
1782   ASSERT(0 < n);
1783   ASSERT(n < span->length);
1784   ASSERT(!span->free);
1785   ASSERT(span->sizeclass == 0);
1786   Event(span, 'T', n);
1787
1788   const Length extra = span->length - n;
1789   Span* leftover = NewSpan(span->start + n, extra);
1790   Event(leftover, 'U', extra);
1791   RecordSpan(leftover);
1792   pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
1793   span->length = n;
1794
1795   return leftover;
1796 }
1797
1798 inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
1799   ASSERT(n > 0);
1800   DLL_Remove(span);
1801   span->free = 0;
1802   Event(span, 'A', n);
1803
1804   if (released) {
1805     // If the span chosen to carve from is decommited, commit the entire span at once to avoid committing spans 1 page at a time.
1806     ASSERT(span->decommitted);
1807     TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift), static_cast<size_t>(span->length << kPageShift));
1808     span->decommitted = false;
1809 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1810     free_committed_pages_ += span->length;
1811 #endif
1812   }
1813
1814   const int extra = static_cast<int>(span->length - n);
1815   ASSERT(extra >= 0);
1816   if (extra > 0) {
1817     Span* leftover = NewSpan(span->start + n, extra);
1818     leftover->free = 1;
1819     leftover->decommitted = false;
1820     Event(leftover, 'S', extra);
1821     RecordSpan(leftover);
1822
1823     // Place leftover span on appropriate free list
1824     SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_;
1825     Span* dst = &listpair->normal;
1826     DLL_Prepend(dst, leftover);
1827
1828     span->length = n;
1829     pagemap_.set(span->start + n - 1, span);
1830   }
1831 }
1832
1833 static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other)
1834 {
1835     if (destination->decommitted && !other->decommitted) {
1836         TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift),
1837                                static_cast<size_t>(other->length << kPageShift));
1838     } else if (other->decommitted && !destination->decommitted) {
1839         TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift),
1840                                static_cast<size_t>(destination->length << kPageShift));
1841         destination->decommitted = true;
1842     }
1843 }
1844
1845 inline void TCMalloc_PageHeap::Delete(Span* span) {
1846   ASSERT(Check());
1847   ASSERT(!span->free);
1848   ASSERT(span->length > 0);
1849   ASSERT(GetDescriptor(span->start) == span);
1850   ASSERT(GetDescriptor(span->start + span->length - 1) == span);
1851   span->sizeclass = 0;
1852 #ifndef NO_TCMALLOC_SAMPLES
1853   span->sample = 0;
1854 #endif
1855
1856   // Coalesce -- we guarantee that "p" != 0, so no bounds checking
1857   // necessary.  We do not bother resetting the stale pagemap
1858   // entries for the pieces we are merging together because we only
1859   // care about the pagemap entries for the boundaries.
1860 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1861   // Track the total size of the neighboring free spans that are committed.
1862   Length neighboringCommittedSpansLength = 0;
1863 #endif
1864   const PageID p = span->start;
1865   const Length n = span->length;
1866   Span* prev = GetDescriptor(p-1);
1867   if (prev != NULL && prev->free) {
1868     // Merge preceding span into this span
1869     ASSERT(prev->start + prev->length == p);
1870     const Length len = prev->length;
1871 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1872     if (!prev->decommitted)
1873         neighboringCommittedSpansLength += len;
1874 #endif
1875     mergeDecommittedStates(span, prev);
1876     DLL_Remove(prev);
1877     DeleteSpan(prev);
1878     span->start -= len;
1879     span->length += len;
1880     pagemap_.set(span->start, span);
1881     Event(span, 'L', len);
1882   }
1883   Span* next = GetDescriptor(p+n);
1884   if (next != NULL && next->free) {
1885     // Merge next span into this span
1886     ASSERT(next->start == p+n);
1887     const Length len = next->length;
1888 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1889     if (!next->decommitted)
1890         neighboringCommittedSpansLength += len;
1891 #endif
1892     mergeDecommittedStates(span, next);
1893     DLL_Remove(next);
1894     DeleteSpan(next);
1895     span->length += len;
1896     pagemap_.set(span->start + span->length - 1, span);
1897     Event(span, 'R', len);
1898   }
1899
1900   Event(span, 'D', span->length);
1901   span->free = 1;
1902   if (span->decommitted) {
1903     if (span->length < kMaxPages)
1904       DLL_Prepend(&free_[span->length].returned, span);
1905     else
1906       DLL_Prepend(&large_.returned, span);
1907   } else {
1908     if (span->length < kMaxPages)
1909       DLL_Prepend(&free_[span->length].normal, span);
1910     else
1911       DLL_Prepend(&large_.normal, span);
1912   }
1913   free_pages_ += n;
1914
1915 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1916   if (span->decommitted) {
1917       // If the merged span is decommitted, that means we decommitted any neighboring spans that were
1918       // committed.  Update the free committed pages count.
1919       free_committed_pages_ -= neighboringCommittedSpansLength;
1920       if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_)
1921             min_free_committed_pages_since_last_scavenge_ = free_committed_pages_;
1922   } else {
1923       // If the merged span remains committed, add the deleted span's size to the free committed pages count.
1924       free_committed_pages_ += n;
1925   }
1926
1927   // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system.
1928   signalScavenger();
1929 #else
1930   IncrementalScavenge(n);
1931 #endif
1932
1933   ASSERT(Check());
1934 }
1935
1936 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1937 void TCMalloc_PageHeap::IncrementalScavenge(Length n) {
1938   // Fast path; not yet time to release memory
1939   scavenge_counter_ -= n;
1940   if (scavenge_counter_ >= 0) return;  // Not yet time to scavenge
1941
1942 #if PLATFORM(IOS)
1943   static const size_t kDefaultReleaseDelay = 64;
1944 #else
1945   // If there is nothing to release, wait for so many pages before
1946   // scavenging again.  With 4K pages, this comes to 16MB of memory.
1947   static const size_t kDefaultReleaseDelay = 1 << 8;
1948 #endif
1949
1950   // Find index of free list to scavenge
1951   size_t index = scavenge_index_ + 1;
1952   for (size_t i = 0; i < kMaxPages+1; i++) {
1953     if (index > kMaxPages) index = 0;
1954     SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index];
1955     if (!DLL_IsEmpty(&slist->normal)) {
1956       // Release the last span on the normal portion of this list
1957       Span* s = slist->normal.prev;
1958       DLL_Remove(s);
1959       TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1960                              static_cast<size_t>(s->length << kPageShift));
1961       s->decommitted = true;
1962       DLL_Prepend(&slist->returned, s);
1963
1964 #if PLATFORM(IOS)
1965       scavenge_counter_ = std::max<size_t>(16UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay)));
1966 #else
1967       scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay)));
1968 #endif
1969
1970       if (index == kMaxPages && !DLL_IsEmpty(&slist->normal))
1971         scavenge_index_ = index - 1;
1972       else
1973         scavenge_index_ = index;
1974       return;
1975     }
1976     index++;
1977   }
1978
1979   // Nothing to scavenge, delay for a while
1980   scavenge_counter_ = kDefaultReleaseDelay;
1981 }
1982 #endif
1983
1984 void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) {
1985   // Associate span object with all interior pages as well
1986   ASSERT(!span->free);
1987   ASSERT(GetDescriptor(span->start) == span);
1988   ASSERT(GetDescriptor(span->start+span->length-1) == span);
1989   Event(span, 'C', sc);
1990   span->sizeclass = static_cast<unsigned int>(sc);
1991   for (Length i = 1; i < span->length-1; i++) {
1992     pagemap_.set(span->start+i, span);
1993   }
1994 }
1995
1996 #ifdef WTF_CHANGES
1997 size_t TCMalloc_PageHeap::ReturnedBytes() const {
1998     size_t result = 0;
1999     for (unsigned s = 0; s < kMaxPages; s++) {
2000         const int r_length = DLL_Length(&free_[s].returned);
2001         unsigned r_pages = s * r_length;
2002         result += r_pages << kPageShift;
2003     }
2004
2005     for (Span* s = large_.returned.next; s != &large_.returned; s = s->next)
2006         result += s->length << kPageShift;
2007     return result;
2008 }
2009 #endif
2010
2011 #ifndef WTF_CHANGES
2012 static double PagesToMB(uint64_t pages) {
2013   return (pages << kPageShift) / 1048576.0;
2014 }
2015
2016 void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
2017   int nonempty_sizes = 0;
2018   for (int s = 0; s < kMaxPages; s++) {
2019     if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) {
2020       nonempty_sizes++;
2021     }
2022   }
2023   out->printf("------------------------------------------------\n");
2024   out->printf("PageHeap: %d sizes; %6.1f MB free\n",
2025               nonempty_sizes, PagesToMB(free_pages_));
2026   out->printf("------------------------------------------------\n");
2027   uint64_t total_normal = 0;
2028   uint64_t total_returned = 0;
2029   for (int s = 0; s < kMaxPages; s++) {
2030     const int n_length = DLL_Length(&free_[s].normal);
2031     const int r_length = DLL_Length(&free_[s].returned);
2032     if (n_length + r_length > 0) {
2033       uint64_t n_pages = s * n_length;
2034       uint64_t r_pages = s * r_length;
2035       total_normal += n_pages;
2036       total_returned += r_pages;
2037       out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum"
2038                   "; unmapped: %6.1f MB; %6.1f MB cum\n",
2039                   s,
2040                   (n_length + r_length),
2041                   PagesToMB(n_pages + r_pages),
2042                   PagesToMB(total_normal + total_returned),
2043                   PagesToMB(r_pages),
2044                   PagesToMB(total_returned));
2045     }
2046   }
2047
2048   uint64_t n_pages = 0;
2049   uint64_t r_pages = 0;
2050   int n_spans = 0;
2051   int r_spans = 0;
2052   out->printf("Normal large spans:\n");
2053   for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
2054     out->printf("   [ %6" PRIuS " pages ] %6.1f MB\n",
2055                 s->length, PagesToMB(s->length));
2056     n_pages += s->length;
2057     n_spans++;
2058   }
2059   out->printf("Unmapped large spans:\n");
2060   for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
2061     out->printf("   [ %6" PRIuS " pages ] %6.1f MB\n",
2062                 s->length, PagesToMB(s->length));
2063     r_pages += s->length;
2064     r_spans++;
2065   }
2066   total_normal += n_pages;
2067   total_returned += r_pages;
2068   out->printf(">255   large * %6u spans ~ %6.1f MB; %6.1f MB cum"
2069               "; unmapped: %6.1f MB; %6.1f MB cum\n",
2070               (n_spans + r_spans),
2071               PagesToMB(n_pages + r_pages),
2072               PagesToMB(total_normal + total_returned),
2073               PagesToMB(r_pages),
2074               PagesToMB(total_returned));
2075 }
2076 #endif
2077
2078 bool TCMalloc_PageHeap::GrowHeap(Length n) {
2079   ASSERT(kMaxPages >= kMinSystemAlloc);
2080   if (n > kMaxValidPages) return false;
2081   Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
2082   size_t actual_size;
2083   void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
2084   if (ptr == NULL) {
2085     if (n < ask) {
2086       // Try growing just "n" pages
2087       ask = n;
2088       ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
2089     }
2090     if (ptr == NULL) return false;
2091   }
2092   ask = actual_size >> kPageShift;
2093
2094   uint64_t old_system_bytes = system_bytes_;
2095   system_bytes_ += (ask << kPageShift);
2096   const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
2097   ASSERT(p > 0);
2098
2099   // If we have already a lot of pages allocated, just pre allocate a bunch of
2100   // memory for the page map. This prevents fragmentation by pagemap metadata
2101   // when a program keeps allocating and freeing large blocks.
2102
2103   if (old_system_bytes < kPageMapBigAllocationThreshold
2104       && system_bytes_ >= kPageMapBigAllocationThreshold) {
2105     pagemap_.PreallocateMoreMemory();
2106   }
2107
2108   // Make sure pagemap_ has entries for all of the new pages.
2109   // Plus ensure one before and one after so coalescing code
2110   // does not need bounds-checking.
2111   if (pagemap_.Ensure(p-1, ask+2)) {
2112     // Pretend the new area is allocated and then Delete() it to
2113     // cause any necessary coalescing to occur.
2114     //
2115     // We do not adjust free_pages_ here since Delete() will do it for us.
2116     Span* span = NewSpan(p, ask);
2117     RecordSpan(span);
2118     Delete(span);
2119     ASSERT(Check());
2120     return true;
2121   } else {
2122     // We could not allocate memory within "pagemap_"
2123     // TODO: Once we can return memory to the system, return the new span
2124     return false;
2125   }
2126 }
2127
2128 bool TCMalloc_PageHeap::Check() {
2129 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2130   size_t totalFreeCommitted = 0;
2131 #endif
2132   ASSERT(free_[0].normal.next == &free_[0].normal);
2133   ASSERT(free_[0].returned.next == &free_[0].returned);
2134 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2135   totalFreeCommitted = CheckList(&large_.normal, kMaxPages, 1000000000, false);
2136 #else
2137   CheckList(&large_.normal, kMaxPages, 1000000000, false);
2138 #endif
2139     CheckList(&large_.returned, kMaxPages, 1000000000, true);
2140   for (Length s = 1; s < kMaxPages; s++) {
2141 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2142     totalFreeCommitted += CheckList(&free_[s].normal, s, s, false);
2143 #else
2144     CheckList(&free_[s].normal, s, s, false);
2145 #endif
2146     CheckList(&free_[s].returned, s, s, true);
2147   }
2148 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2149   ASSERT(totalFreeCommitted == free_committed_pages_);
2150 #endif
2151   return true;
2152 }
2153
2154 #if ASSERT_DISABLED
2155 size_t TCMalloc_PageHeap::CheckList(Span*, Length, Length, bool) {
2156   return 0;
2157 }
2158 #else
2159 size_t TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages, bool decommitted) {
2160   size_t freeCount = 0;
2161   for (Span* s = list->next; s != list; s = s->next) {
2162     CHECK_CONDITION(s->free);
2163     CHECK_CONDITION(s->length >= min_pages);
2164     CHECK_CONDITION(s->length <= max_pages);
2165     CHECK_CONDITION(GetDescriptor(s->start) == s);
2166     CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s);
2167     CHECK_CONDITION(s->decommitted == decommitted);
2168     freeCount += s->length;
2169   }
2170   return freeCount;
2171 }
2172 #endif
2173
2174 void TCMalloc_PageHeap::ReleaseFreeList(Span* list, Span* returned) {
2175   // Walk backwards through list so that when we push these
2176   // spans on the "returned" list, we preserve the order.
2177 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2178   size_t freePageReduction = 0;
2179 #endif
2180
2181   while (!DLL_IsEmpty(list)) {
2182     Span* s = list->prev;
2183
2184     DLL_Remove(s);
2185     s->decommitted = true;
2186     DLL_Prepend(returned, s);
2187     TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
2188                            static_cast<size_t>(s->length << kPageShift));
2189 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2190     freePageReduction += s->length;
2191 #endif
2192   }
2193
2194 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2195     free_committed_pages_ -= freePageReduction;
2196     if (free_committed_pages_ < min_free_committed_pages_since_last_scavenge_)
2197         min_free_committed_pages_since_last_scavenge_ = free_committed_pages_;
2198 #endif
2199 }
2200
2201 void TCMalloc_PageHeap::ReleaseFreePages() {
2202   for (Length s = 0; s < kMaxPages; s++) {
2203     ReleaseFreeList(&free_[s].normal, &free_[s].returned);
2204   }
2205   ReleaseFreeList(&large_.normal, &large_.returned);
2206   ASSERT(Check());
2207 }
2208
2209 //-------------------------------------------------------------------
2210 // Free list
2211 //-------------------------------------------------------------------
2212
2213 class TCMalloc_ThreadCache_FreeList {
2214  private:
2215   void*    list_;       // Linked list of nodes
2216   uint16_t length_;     // Current length
2217   uint16_t lowater_;    // Low water mark for list length
2218
2219  public:
2220   void Init() {
2221     list_ = NULL;
2222     length_ = 0;
2223     lowater_ = 0;
2224   }
2225
2226   // Return current length of list
2227   int length() const {
2228     return length_;
2229   }
2230
2231   // Is list empty?
2232   bool empty() const {
2233     return list_ == NULL;
2234   }
2235
2236   // Low-water mark management
2237   int lowwatermark() const { return lowater_; }
2238   void clear_lowwatermark() { lowater_ = length_; }
2239
2240   ALWAYS_INLINE void Push(void* ptr) {
2241     SLL_Push(&list_, ptr);
2242     length_++;
2243   }
2244
2245   void PushRange(int N, void *start, void *end) {
2246     SLL_PushRange(&list_, start, end);
2247     length_ = length_ + static_cast<uint16_t>(N);
2248   }
2249
2250   void PopRange(int N, void **start, void **end) {
2251     SLL_PopRange(&list_, N, start, end);
2252     ASSERT(length_ >= N);
2253     length_ = length_ - static_cast<uint16_t>(N);
2254     if (length_ < lowater_) lowater_ = length_;
2255   }
2256
2257   ALWAYS_INLINE void* Pop() {
2258     ASSERT(list_ != NULL);
2259     length_--;
2260     if (length_ < lowater_) lowater_ = length_;
2261     return SLL_Pop(&list_);
2262   }
2263
2264 #ifdef WTF_CHANGES
2265   template <class Finder, class Reader>
2266   void enumerateFreeObjects(Finder& finder, const Reader& reader)
2267   {
2268       for (void* nextObject = list_; nextObject; nextObject = reader.nextEntryInLinkedList(reinterpret_cast<void**>(nextObject)))
2269           finder.visit(nextObject);
2270   }
2271 #endif
2272 };
2273
2274 //-------------------------------------------------------------------
2275 // Data kept per thread
2276 //-------------------------------------------------------------------
2277
2278 class TCMalloc_ThreadCache {
2279  private:
2280   typedef TCMalloc_ThreadCache_FreeList FreeList;
2281 #if OS(WINDOWS)
2282   typedef DWORD ThreadIdentifier;
2283 #else
2284   typedef pthread_t ThreadIdentifier;
2285 #endif
2286
2287   size_t        size_;                  // Combined size of data
2288   ThreadIdentifier tid_;                // Which thread owns it
2289   bool          in_setspecific_;           // Called pthread_setspecific?
2290   FreeList      list_[kNumClasses];     // Array indexed by size-class
2291
2292   // We sample allocations, biased by the size of the allocation
2293   uint32_t      rnd_;                   // Cheap random number generator
2294   size_t        bytes_until_sample_;    // Bytes until we sample next
2295
2296   // Allocate a new heap. REQUIRES: pageheap_lock is held.
2297   static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid);
2298
2299   // Use only as pthread thread-specific destructor function.
2300   static void DestroyThreadCache(void* ptr);
2301  public:
2302   // All ThreadCache objects are kept in a linked list (for stats collection)
2303   TCMalloc_ThreadCache* next_;
2304   TCMalloc_ThreadCache* prev_;
2305
2306   void Init(ThreadIdentifier tid);
2307   void Cleanup();
2308
2309   // Accessors (mostly just for printing stats)
2310   int freelist_length(size_t cl) const { return list_[cl].length(); }
2311
2312   // Total byte size in cache
2313   size_t Size() const { return size_; }
2314
2315   ALWAYS_INLINE void* Allocate(size_t size);
2316   void Deallocate(void* ptr, size_t size_class);
2317
2318   ALWAYS_INLINE void FetchFromCentralCache(size_t cl, size_t allocationSize);
2319   void ReleaseToCentralCache(size_t cl, int N);
2320   void Scavenge();
2321   void Print() const;
2322
2323   // Record allocation of "k" bytes.  Return true iff allocation
2324   // should be sampled
2325   bool SampleAllocation(size_t k);
2326
2327   // Pick next sampling point
2328   void PickNextSample(size_t k);
2329
2330   static void                  InitModule();
2331   static void                  InitTSD();
2332   static TCMalloc_ThreadCache* GetThreadHeap();
2333   static TCMalloc_ThreadCache* GetCache();
2334   static TCMalloc_ThreadCache* GetCacheIfPresent();
2335   static TCMalloc_ThreadCache* CreateCacheIfNecessary();
2336   static void                  DeleteCache(TCMalloc_ThreadCache* heap);
2337   static void                  BecomeIdle();
2338   static void                  RecomputeThreadCacheSize();
2339
2340 #ifdef WTF_CHANGES
2341   template <class Finder, class Reader>
2342   void enumerateFreeObjects(Finder& finder, const Reader& reader)
2343   {
2344       for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++)
2345           list_[sizeClass].enumerateFreeObjects(finder, reader);
2346   }
2347 #endif
2348 };
2349
2350 //-------------------------------------------------------------------
2351 // Data kept per size-class in central cache
2352 //-------------------------------------------------------------------
2353
2354 class TCMalloc_Central_FreeList {
2355  public:
2356   void Init(size_t cl);
2357
2358   // These methods all do internal locking.
2359
2360   // Insert the specified range into the central freelist.  N is the number of
2361   // elements in the range.
2362   void InsertRange(void *start, void *end, int N);
2363
2364   // Returns the actual number of fetched elements into N.
2365   void RemoveRange(void **start, void **end, int *N);
2366
2367   // Returns the number of free objects in cache.
2368   size_t length() {
2369     SpinLockHolder h(&lock_);
2370     return counter_;
2371   }
2372
2373   // Returns the number of free objects in the transfer cache.
2374   int tc_length() {
2375     SpinLockHolder h(&lock_);
2376     return used_slots_ * num_objects_to_move[size_class_];
2377   }
2378
2379 #ifdef WTF_CHANGES
2380   template <class Finder, class Reader>
2381   void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList)
2382   {
2383     for (Span* span = &empty_; span && span != &empty_; span = (span->next ? reader(span->next) : 0))
2384       ASSERT(!span->objects);
2385
2386     ASSERT(!nonempty_.objects);
2387     static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this);
2388
2389     Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset);
2390     Span* remoteSpan = nonempty_.next;
2391
2392     for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) {
2393       for (void* nextObject = span->objects; nextObject; nextObject = reader.nextEntryInLinkedList(reinterpret_cast<void**>(nextObject)))
2394         finder.visit(nextObject);
2395     }
2396   }
2397 #endif
2398
2399  private:
2400   // REQUIRES: lock_ is held
2401   // Remove object from cache and return.
2402   // Return NULL if no free entries in cache.
2403   void* FetchFromSpans();
2404
2405   // REQUIRES: lock_ is held
2406   // Remove object from cache and return.  Fetches
2407   // from pageheap if cache is empty.  Only returns
2408   // NULL on allocation failure.
2409   void* FetchFromSpansSafe();
2410
2411   // REQUIRES: lock_ is held
2412   // Release a linked list of objects to spans.
2413   // May temporarily release lock_.
2414   void ReleaseListToSpans(void *start);
2415
2416   // REQUIRES: lock_ is held
2417   // Release an object to spans.
2418   // May temporarily release lock_.
2419   ALWAYS_INLINE void ReleaseToSpans(void* object);
2420
2421   // REQUIRES: lock_ is held
2422   // Populate cache by fetching from the page heap.
2423   // May temporarily release lock_.
2424   ALWAYS_INLINE void Populate();
2425
2426   // REQUIRES: lock is held.
2427   // Tries to make room for a TCEntry.  If the cache is full it will try to
2428   // expand it at the cost of some other cache size.  Return false if there is
2429   // no space.
2430   bool MakeCacheSpace();
2431
2432   // REQUIRES: lock_ for locked_size_class is held.
2433   // Picks a "random" size class to steal TCEntry slot from.  In reality it
2434   // just iterates over the sizeclasses but does so without taking a lock.
2435   // Returns true on success.
2436   // May temporarily lock a "random" size class.
2437   static ALWAYS_INLINE bool EvictRandomSizeClass(size_t locked_size_class, bool force);
2438
2439   // REQUIRES: lock_ is *not* held.
2440   // Tries to shrink the Cache.  If force is true it will relase objects to
2441   // spans if it allows it to shrink the cache.  Return false if it failed to
2442   // shrink the cache.  Decrements cache_size_ on succeess.
2443   // May temporarily take lock_.  If it takes lock_, the locked_size_class
2444   // lock is released to the thread from holding two size class locks
2445   // concurrently which could lead to a deadlock.
2446   bool ShrinkCache(int locked_size_class, bool force);
2447
2448   // This lock protects all the data members.  cached_entries and cache_size_
2449   // may be looked at without holding the lock.
2450   SpinLock lock_;
2451
2452   // We keep linked lists of empty and non-empty spans.
2453   size_t   size_class_;     // My size class
2454   Span     empty_;          // Dummy header for list of empty spans
2455   Span     nonempty_;       // Dummy header for list of non-empty spans
2456   size_t   counter_;        // Number of free objects in cache entry
2457
2458   // Here we reserve space for TCEntry cache slots.  Since one size class can
2459   // end up getting all the TCEntries quota in the system we just preallocate
2460   // sufficient number of entries here.
2461   TCEntry tc_slots_[kNumTransferEntries];
2462
2463   // Number of currently used cached entries in tc_slots_.  This variable is
2464   // updated under a lock but can be read without one.
2465   int32_t used_slots_;
2466   // The current number of slots for this size class.  This is an
2467   // adaptive value that is increased if there is lots of traffic
2468   // on a given size class.
2469   int32_t cache_size_;
2470 };
2471
2472 // Pad each CentralCache object to multiple of 64 bytes
2473 class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList {
2474  private:
2475   char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64];
2476 };
2477
2478 //-------------------------------------------------------------------
2479 // Global variables
2480 //-------------------------------------------------------------------
2481
2482 // Central cache -- a collection of free-lists, one per size-class.
2483 // We have a separate lock per free-list to reduce contention.
2484 static TCMalloc_Central_FreeListPadded central_cache[kNumClasses];
2485
2486 // Page-level allocator
2487 static AllocAlignmentInteger pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(AllocAlignmentInteger) - 1) / sizeof(AllocAlignmentInteger)];
2488 static bool phinited = false;
2489
2490 // Avoid extra level of indirection by making "pageheap" be just an alias
2491 // of pageheap_memory.
2492 typedef union {
2493     void* m_memory;
2494     TCMalloc_PageHeap* m_pageHeap;
2495 } PageHeapUnion;
2496
2497 static inline TCMalloc_PageHeap* getPageHeap()
2498 {
2499     PageHeapUnion u = { &pageheap_memory[0] };
2500     return u.m_pageHeap;
2501 }
2502
2503 #define pageheap getPageHeap()
2504
2505 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2506
2507 #if HAVE(DISPATCH_H) || OS(WINDOWS)
2508
2509 void TCMalloc_PageHeap::periodicScavenge()
2510 {
2511     SpinLockHolder h(&pageheap_lock);
2512     pageheap->scavenge();
2513
2514     if (shouldScavenge()) {
2515         rescheduleScavenger();
2516         return;
2517     }
2518
2519     suspendScavenger();
2520 }
2521
2522 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger()
2523 {
2524     ASSERT(pageheap_lock.IsHeld());
2525     if (isScavengerSuspended() && shouldScavenge())
2526         scheduleScavenger();
2527 }
2528
2529 #else
2530
2531 void TCMalloc_PageHeap::scavengerThread()
2532 {
2533 #if HAVE(PTHREAD_SETNAME_NP)
2534   pthread_setname_np("JavaScriptCore: FastMalloc scavenger");
2535 #endif
2536
2537   while (1) {
2538       if (!shouldScavenge()) {
2539           pthread_mutex_lock(&m_scavengeMutex);
2540           m_scavengeThreadActive = false;
2541           // Block until there are enough free committed pages to release back to the system.
2542           pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex);
2543           m_scavengeThreadActive = true;
2544           pthread_mutex_unlock(&m_scavengeMutex);
2545       }
2546       sleep(kScavengeDelayInSeconds);
2547       {
2548           SpinLockHolder h(&pageheap_lock);
2549           pageheap->scavenge();
2550       }
2551   }
2552 }
2553
2554 #endif
2555
2556 #endif
2557
2558 // If TLS is available, we also store a copy
2559 // of the per-thread object in a __thread variable
2560 // since __thread variables are faster to read
2561 // than pthread_getspecific().  We still need
2562 // pthread_setspecific() because __thread
2563 // variables provide no way to run cleanup
2564 // code when a thread is destroyed.
2565 #ifdef HAVE_TLS
2566 static __thread TCMalloc_ThreadCache *threadlocal_heap;
2567 #endif
2568 // Thread-specific key.  Initialization here is somewhat tricky
2569 // because some Linux startup code invokes malloc() before it
2570 // is in a good enough state to handle pthread_keycreate().
2571 // Therefore, we use TSD keys only after tsd_inited is set to true.
2572 // Until then, we use a slow path to get the heap object.
2573 static bool tsd_inited = false;
2574 #if USE(PTHREAD_GETSPECIFIC_DIRECT)
2575 static const pthread_key_t heap_key = __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY0;
2576 #else
2577 static pthread_key_t heap_key;
2578 #endif
2579 #if OS(WINDOWS)
2580 DWORD tlsIndex = TLS_OUT_OF_INDEXES;
2581 #endif
2582
2583 static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap)
2584 {
2585 #if USE(PTHREAD_GETSPECIFIC_DIRECT)
2586     // Can't have two libraries both doing this in the same process,
2587     // so check and make this crash right away.
2588     if (pthread_getspecific(heap_key))
2589         CRASH();
2590 #endif
2591
2592     // Still do pthread_setspecific even if there's an alternate form
2593     // of thread-local storage in use, to benefit from the delete callback.
2594     pthread_setspecific(heap_key, heap);
2595
2596 #if OS(WINDOWS)
2597     TlsSetValue(tlsIndex, heap);
2598 #endif
2599 }
2600
2601 // Allocator for thread heaps
2602 static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator;
2603
2604 // Linked list of heap objects.  Protected by pageheap_lock.
2605 static TCMalloc_ThreadCache* thread_heaps = NULL;
2606 static int thread_heap_count = 0;
2607
2608 // Overall thread cache size.  Protected by pageheap_lock.
2609 static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize;
2610
2611 // Global per-thread cache size.  Writes are protected by
2612 // pageheap_lock.  Reads are done without any locking, which should be
2613 // fine as long as size_t can be written atomically and we don't place
2614 // invariants between this variable and other pieces of state.
2615 static volatile size_t per_thread_cache_size = kMaxThreadCacheSize;
2616
2617 //-------------------------------------------------------------------
2618 // Central cache implementation
2619 //-------------------------------------------------------------------
2620
2621 void TCMalloc_Central_FreeList::Init(size_t cl) {
2622   lock_.Init();
2623   size_class_ = cl;
2624   DLL_Init(&empty_);
2625   DLL_Init(&nonempty_);
2626   counter_ = 0;
2627
2628   cache_size_ = 1;
2629   used_slots_ = 0;
2630   ASSERT(cache_size_ <= kNumTransferEntries);
2631 }
2632
2633 void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) {
2634   while (start) {
2635     void *next = SLL_Next(start);
2636     ReleaseToSpans(start);
2637     start = next;
2638   }
2639 }
2640
2641 #if ENABLE(TIZEN_FIX_BUILD_BREAK_GCC) && CPU(ARM_THUMB2) && GCC_VERSION_AT_LEAST(4, 4, 0)
2642 ALWAYS_INLINE __attribute__((optimize("O0"))) void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) {
2643 #else
2644 ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) {
2645 #endif
2646   const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
2647   Span* span = pageheap->GetDescriptor(p);
2648   ASSERT(span != NULL);
2649   ASSERT(span->refcount > 0);
2650
2651   // If span is empty, move it to non-empty list
2652   if (span->objects == NULL) {
2653     DLL_Remove(span);
2654     DLL_Prepend(&nonempty_, span);
2655     Event(span, 'N', 0);
2656   }
2657
2658   // The following check is expensive, so it is disabled by default
2659   if (false) {
2660     // Check that object does not occur in list
2661     unsigned got = 0;
2662     for (void* p = span->objects; p != NULL; p = *((void**) p)) {
2663       ASSERT(p != object);
2664       got++;
2665     }
2666     ASSERT(got + span->refcount ==
2667            (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass));
2668   }
2669
2670   counter_++;
2671   span->refcount--;
2672   if (span->refcount == 0) {
2673     Event(span, '#', 0);
2674     counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass);
2675     DLL_Remove(span);
2676
2677     // Release central list lock while operating on pageheap
2678     lock_.Unlock();
2679     {
2680       SpinLockHolder h(&pageheap_lock);
2681       pageheap->Delete(span);
2682     }
2683     lock_.Lock();
2684   } else {
2685     *(reinterpret_cast<void**>(object)) = span->objects;
2686     span->objects = object;
2687   }
2688 }
2689
2690 ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass(
2691     size_t locked_size_class, bool force) {
2692   static int race_counter = 0;
2693   int t = race_counter++;  // Updated without a lock, but who cares.
2694   if (t >= static_cast<int>(kNumClasses)) {
2695     while (t >= static_cast<int>(kNumClasses)) {
2696       t -= kNumClasses;
2697     }
2698     race_counter = t;
2699   }
2700   ASSERT(t >= 0);
2701   ASSERT(t < static_cast<int>(kNumClasses));
2702   if (t == static_cast<int>(locked_size_class)) return false;
2703   return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force);
2704 }
2705
2706 bool TCMalloc_Central_FreeList::MakeCacheSpace() {
2707   // Is there room in the cache?
2708   if (used_slots_ < cache_size_) return true;
2709   // Check if we can expand this cache?
2710   if (cache_size_ == kNumTransferEntries) return false;
2711   // Ok, we'll try to grab an entry from some other size class.
2712   if (EvictRandomSizeClass(size_class_, false) ||
2713       EvictRandomSizeClass(size_class_, true)) {
2714     // Succeeded in evicting, we're going to make our cache larger.
2715     cache_size_++;
2716     return true;
2717   }
2718   return false;
2719 }
2720
2721
2722 namespace {
2723 class LockInverter {
2724  private:
2725   SpinLock *held_, *temp_;
2726  public:
2727   inline explicit LockInverter(SpinLock* held, SpinLock *temp)
2728     : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
2729   inline ~LockInverter() { temp_->Unlock(); held_->Lock();  }
2730 };
2731 }
2732
2733 bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) {
2734   // Start with a quick check without taking a lock.
2735   if (cache_size_ == 0) return false;
2736   // We don't evict from a full cache unless we are 'forcing'.
2737   if (force == false && used_slots_ == cache_size_) return false;
2738
2739   // Grab lock, but first release the other lock held by this thread.  We use
2740   // the lock inverter to ensure that we never hold two size class locks
2741   // concurrently.  That can create a deadlock because there is no well
2742   // defined nesting order.
2743   LockInverter li(&central_cache[locked_size_class].lock_, &lock_);
2744   ASSERT(used_slots_ <= cache_size_);
2745   ASSERT(0 <= cache_size_);
2746   if (cache_size_ == 0) return false;
2747   if (used_slots_ == cache_size_) {
2748     if (force == false) return false;
2749     // ReleaseListToSpans releases the lock, so we have to make all the
2750     // updates to the central list before calling it.
2751     cache_size_--;
2752     used_slots_--;
2753     ReleaseListToSpans(tc_slots_[used_slots_].head);
2754     return true;
2755   }
2756   cache_size_--;
2757   return true;
2758 }
2759
2760 void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) {
2761   SpinLockHolder h(&lock_);
2762   if (N == num_objects_to_move[size_class_] &&
2763     MakeCacheSpace()) {
2764     int slot = used_slots_++;
2765     ASSERT(slot >=0);
2766     ASSERT(slot < kNumTransferEntries);
2767     TCEntry *entry = &tc_slots_[slot];
2768     entry->head = start;
2769     entry->tail = end;
2770     return;
2771   }
2772   ReleaseListToSpans(start);
2773 }
2774
2775 void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) {
2776   int num = *N;
2777   ASSERT(num > 0);
2778
2779   SpinLockHolder h(&lock_);
2780   if (num == num_objects_to_move[size_class_] && used_slots_ > 0) {
2781     int slot = --used_slots_;
2782     ASSERT(slot >= 0);
2783     TCEntry *entry = &tc_slots_[slot];
2784     *start = entry->head;
2785     *end = entry->tail;
2786     return;
2787   }
2788
2789   // TODO: Prefetch multiple TCEntries?
2790   void *tail = FetchFromSpansSafe();
2791   if (!tail) {
2792     // We are completely out of memory.
2793     *start = *end = NULL;
2794     *N = 0;
2795     return;
2796   }
2797
2798   SLL_SetNext(tail, NULL);
2799   void *head = tail;
2800   int count = 1;
2801   while (count < num) {
2802     void *t = FetchFromSpans();
2803     if (!t) break;
2804     SLL_Push(&head, t);
2805     count++;
2806   }
2807   *start = head;
2808   *end = tail;
2809   *N = count;
2810 }
2811
2812
2813 void* TCMalloc_Central_FreeList::FetchFromSpansSafe() {
2814   void *t = FetchFromSpans();
2815   if (!t) {
2816     Populate();
2817     t = FetchFromSpans();
2818   }
2819   return t;
2820 }
2821
2822 void* TCMalloc_Central_FreeList::FetchFromSpans() {
2823   if (DLL_IsEmpty(&nonempty_)) return NULL;
2824   Span* span = nonempty_.next;
2825
2826   ASSERT(span->objects != NULL);
2827   ASSERT_SPAN_COMMITTED(span);
2828   span->refcount++;
2829   void* result = span->objects;
2830   span->objects = *(reinterpret_cast<void**>(result));
2831   if (span->objects == NULL) {
2832     // Move to empty list
2833     DLL_Remove(span);
2834     DLL_Prepend(&empty_, span);
2835     Event(span, 'E', 0);
2836   }
2837   counter_--;
2838   return result;
2839 }
2840
2841 // Fetch memory from the system and add to the central cache freelist.
2842 ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() {
2843   // Release central list lock while operating on pageheap
2844   lock_.Unlock();
2845   const size_t npages = class_to_pages[size_class_];
2846
2847   Span* span;
2848   {
2849     SpinLockHolder h(&pageheap_lock);
2850     span = pageheap->New(npages);
2851     if (span) pageheap->RegisterSizeClass(span, size_class_);
2852   }
2853   if (span == NULL) {
2854 #if HAVE(ERRNO_H)
2855     MESSAGE("allocation failed: %d\n", errno);
2856 #elif OS(WINDOWS)
2857     MESSAGE("allocation failed: %d\n", ::GetLastError());
2858 #else
2859     MESSAGE("allocation failed\n");
2860 #endif
2861     lock_.Lock();
2862     return;
2863   }
2864   ASSERT_SPAN_COMMITTED(span);
2865   ASSERT(span->length == npages);
2866   // Cache sizeclass info eagerly.  Locking is not necessary.
2867   // (Instead of being eager, we could just replace any stale info
2868   // about this span, but that seems to be no better in practice.)
2869   for (size_t i = 0; i < npages; i++) {
2870     pageheap->CacheSizeClass(span->start + i, size_class_);
2871   }
2872
2873   // Split the block into pieces and add to the free-list
2874   // TODO: coloring of objects to avoid cache conflicts?
2875   void** tail = &span->objects;
2876   char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
2877   char* limit = ptr + (npages << kPageShift);
2878   const size_t size = ByteSizeForClass(size_class_);
2879   int num = 0;
2880   char* nptr;
2881   while ((nptr = ptr + size) <= limit) {
2882     *tail = ptr;
2883     tail = reinterpret_cast_ptr<void**>(ptr);
2884     ptr = nptr;
2885     num++;
2886   }
2887   ASSERT(ptr <= limit);
2888   *tail = NULL;
2889   span->refcount = 0; // No sub-object in use yet
2890
2891   // Add span to list of non-empty spans
2892   lock_.Lock();
2893   DLL_Prepend(&nonempty_, span);
2894   counter_ += num;
2895 }
2896
2897 //-------------------------------------------------------------------
2898 // TCMalloc_ThreadCache implementation
2899 //-------------------------------------------------------------------
2900
2901 inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) {
2902   if (bytes_until_sample_ < k) {
2903     PickNextSample(k);
2904     return true;
2905   } else {
2906     bytes_until_sample_ -= k;
2907     return false;
2908   }
2909 }
2910
2911 void TCMalloc_ThreadCache::Init(ThreadIdentifier tid) {
2912   size_ = 0;
2913   next_ = NULL;
2914   prev_ = NULL;
2915   tid_  = tid;
2916   in_setspecific_ = false;
2917   for (size_t cl = 0; cl < kNumClasses; ++cl) {
2918     list_[cl].Init();
2919   }
2920
2921   // Initialize RNG -- run it for a bit to get to good values
2922   bytes_until_sample_ = 0;
2923   rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
2924   for (int i = 0; i < 100; i++) {
2925     PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2));
2926   }
2927 }
2928
2929 void TCMalloc_ThreadCache::Cleanup() {
2930   // Put unused memory back into central cache
2931   for (size_t cl = 0; cl < kNumClasses; ++cl) {
2932     if (list_[cl].length() > 0) {
2933       ReleaseToCentralCache(cl, list_[cl].length());
2934     }
2935   }
2936 }
2937
2938 ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) {
2939   ASSERT(size <= kMaxSize);
2940   const size_t cl = SizeClass(size);
2941   FreeList* list = &list_[cl];
2942   size_t allocationSize = ByteSizeForClass(cl);
2943   if (list->empty()) {
2944     FetchFromCentralCache(cl, allocationSize);
2945     if (list->empty()) return NULL;
2946   }
2947   size_ -= allocationSize;
2948   return list->Pop();
2949 }
2950
2951 inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) {
2952   size_ += ByteSizeForClass(cl);
2953   FreeList* list = &list_[cl];
2954   list->Push(ptr);
2955   // If enough data is free, put back into central cache
2956   if (list->length() > kMaxFreeListLength) {
2957     ReleaseToCentralCache(cl, num_objects_to_move[cl]);
2958   }
2959   if (size_ >= per_thread_cache_size) Scavenge();
2960 }
2961
2962 // Remove some objects of class "cl" from central cache and add to thread heap
2963 ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) {
2964   int fetch_count = num_objects_to_move[cl];
2965   void *start, *end;
2966   central_cache[cl].RemoveRange(&start, &end, &fetch_count);
2967   list_[cl].PushRange(fetch_count, start, end);
2968   size_ += allocationSize * fetch_count;
2969 }
2970
2971 // Remove some objects of class "cl" from thread heap and add to central cache
2972 inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) {
2973   ASSERT(N > 0);
2974   FreeList* src = &list_[cl];
2975   if (N > src->length()) N = src->length();
2976   size_ -= N*ByteSizeForClass(cl);
2977
2978   // We return prepackaged chains of the correct size to the central cache.
2979   // TODO: Use the same format internally in the thread caches?
2980   int batch_size = num_objects_to_move[cl];
2981   while (N > batch_size) {
2982     void *tail, *head;
2983     src->PopRange(batch_size, &head, &tail);
2984     central_cache[cl].InsertRange(head, tail, batch_size);
2985     N -= batch_size;
2986   }
2987   void *tail, *head;
2988   src->PopRange(N, &head, &tail);
2989   central_cache[cl].InsertRange(head, tail, N);
2990 }
2991
2992 // Release idle memory to the central cache
2993 inline void TCMalloc_ThreadCache::Scavenge() {
2994   // If the low-water mark for the free list is L, it means we would
2995   // not have had to allocate anything from the central cache even if
2996   // we had reduced the free list size by L.  We aim to get closer to
2997   // that situation by dropping L/2 nodes from the free list.  This
2998   // may not release much memory, but if so we will call scavenge again
2999   // pretty soon and the low-water marks will be high on that call.
3000   //int64 start = CycleClock::Now();
3001
3002   for (size_t cl = 0; cl < kNumClasses; cl++) {
3003     FreeList* list = &list_[cl];
3004     const int lowmark = list->lowwatermark();
3005     if (lowmark > 0) {
3006       const int drop = (lowmark > 1) ? lowmark/2 : 1;
3007       ReleaseToCentralCache(cl, drop);
3008     }
3009     list->clear_lowwatermark();
3010   }
3011
3012   //int64 finish = CycleClock::Now();
3013   //CycleTimer ct;
3014   //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0);
3015 }
3016
3017 void TCMalloc_ThreadCache::PickNextSample(size_t k) {
3018   // Make next "random" number
3019   // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers
3020   static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0);
3021   uint32_t r = rnd_;
3022   rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly);
3023
3024   // Next point is "rnd_ % (sample_period)".  I.e., average
3025   // increment is "sample_period/2".
3026   const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter);
3027   static int last_flag_value = -1;
3028
3029   if (flag_value != last_flag_value) {
3030     SpinLockHolder h(&sample_period_lock);
3031     int i;
3032     for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) {
3033       if (primes_list[i] >= flag_value) {
3034         break;
3035       }
3036     }
3037     sample_period = primes_list[i];
3038     last_flag_value = flag_value;
3039   }
3040
3041   bytes_until_sample_ += rnd_ % sample_period;
3042
3043   if (k > (static_cast<size_t>(-1) >> 2)) {
3044     // If the user has asked for a huge allocation then it is possible
3045     // for the code below to loop infinitely.  Just return (note that
3046     // this throws off the sampling accuracy somewhat, but a user who
3047     // is allocating more than 1G of memory at a time can live with a
3048     // minor inaccuracy in profiling of small allocations, and also
3049     // would rather not wait for the loop below to terminate).
3050     return;
3051   }
3052
3053   while (bytes_until_sample_ < k) {
3054     // Increase bytes_until_sample_ by enough average sampling periods
3055     // (sample_period >> 1) to allow us to sample past the current
3056     // allocation.
3057     bytes_until_sample_ += (sample_period >> 1);
3058   }
3059
3060   bytes_until_sample_ -= k;
3061 }
3062
3063 void TCMalloc_ThreadCache::InitModule() {
3064   // There is a slight potential race here because of double-checked
3065   // locking idiom.  However, as long as the program does a small
3066   // allocation before switching to multi-threaded mode, we will be
3067   // fine.  We increase the chances of doing such a small allocation
3068   // by doing one in the constructor of the module_enter_exit_hook
3069   // object declared below.
3070   SpinLockHolder h(&pageheap_lock);
3071   if (!phinited) {
3072 #ifdef WTF_CHANGES
3073     InitTSD();
3074 #endif
3075     InitSizeClasses();
3076     threadheap_allocator.Init();
3077     span_allocator.Init();
3078     span_allocator.New(); // Reduce cache conflicts
3079     span_allocator.New(); // Reduce cache conflicts
3080     stacktrace_allocator.Init();
3081     DLL_Init(&sampled_objects);
3082     for (size_t i = 0; i < kNumClasses; ++i) {
3083       central_cache[i].Init(i);
3084     }
3085     pageheap->init();
3086     phinited = 1;
3087 #if defined(WTF_CHANGES) && OS(DARWIN)
3088     FastMallocZone::init();
3089 #endif
3090   }
3091 }
3092
3093 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid) {
3094   // Create the heap and add it to the linked list
3095   TCMalloc_ThreadCache *heap = threadheap_allocator.New();
3096   heap->Init(tid);
3097   heap->next_ = thread_heaps;
3098   heap->prev_ = NULL;
3099   if (thread_heaps != NULL) thread_heaps->prev_ = heap;
3100   thread_heaps = heap;
3101   thread_heap_count++;
3102   RecomputeThreadCacheSize();
3103   return heap;
3104 }
3105
3106 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() {
3107 #ifdef HAVE_TLS
3108     // __thread is faster, but only when the kernel supports it
3109   if (KernelSupportsTLS())
3110     return threadlocal_heap;
3111 #elif OS(WINDOWS)
3112     return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex));
3113 #else
3114     return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key));
3115 #endif
3116 }
3117
3118 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
3119   TCMalloc_ThreadCache* ptr = NULL;
3120   if (!tsd_inited) {
3121     InitModule();
3122   } else {
3123     ptr = GetThreadHeap();
3124   }
3125   if (ptr == NULL) ptr = CreateCacheIfNecessary();
3126   return ptr;
3127 }
3128
3129 // In deletion paths, we do not try to create a thread-cache.  This is
3130 // because we may be in the thread destruction code and may have
3131 // already cleaned up the cache for this thread.
3132 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
3133   if (!tsd_inited) return NULL;
3134   void* const p = GetThreadHeap();
3135   return reinterpret_cast<TCMalloc_ThreadCache*>(p);
3136 }
3137
3138 void TCMalloc_ThreadCache::InitTSD() {
3139   ASSERT(!tsd_inited);
3140 #if USE(PTHREAD_GETSPECIFIC_DIRECT)
3141   pthread_key_init_np(heap_key, DestroyThreadCache);
3142 #else
3143   pthread_key_create(&heap_key, DestroyThreadCache);
3144 #endif
3145 #if OS(WINDOWS)
3146   tlsIndex = TlsAlloc();
3147 #endif
3148   tsd_inited = true;
3149
3150 #if !OS(WINDOWS)
3151   // We may have used a fake pthread_t for the main thread.  Fix it.
3152   pthread_t zero;
3153   memset(&zero, 0, sizeof(zero));
3154 #endif
3155 #ifndef WTF_CHANGES
3156   SpinLockHolder h(&pageheap_lock);
3157 #else
3158   ASSERT(pageheap_lock.IsHeld());
3159 #endif
3160   for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
3161 #if OS(WINDOWS)
3162     if (h->tid_ == 0) {
3163       h->tid_ = GetCurrentThreadId();
3164     }
3165 #else
3166     if (pthread_equal(h->tid_, zero)) {
3167       h->tid_ = pthread_self();
3168     }
3169 #endif
3170   }
3171 }
3172
3173 TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
3174   // Initialize per-thread data if necessary
3175   TCMalloc_ThreadCache* heap = NULL;
3176   {
3177     SpinLockHolder h(&pageheap_lock);
3178
3179 #if OS(WINDOWS)
3180     DWORD me;
3181     if (!tsd_inited) {
3182       me = 0;
3183     } else {
3184       me = GetCurrentThreadId();
3185     }
3186 #else
3187     // Early on in glibc's life, we cannot even call pthread_self()
3188     pthread_t me;
3189     if (!tsd_inited) {
3190       memset(&me, 0, sizeof(me));
3191     } else {
3192       me = pthread_self();
3193     }
3194 #endif
3195
3196     // This may be a recursive malloc call from pthread_setspecific()
3197     // In that case, the heap for this thread has already been created
3198     // and added to the linked list.  So we search for that first.
3199     for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
3200 #if OS(WINDOWS)
3201       if (h->tid_ == me) {
3202 #else
3203       if (pthread_equal(h->tid_, me)) {
3204 #endif
3205         heap = h;
3206         break;
3207       }
3208     }
3209
3210     if (heap == NULL) heap = NewHeap(me);
3211   }
3212
3213   // We call pthread_setspecific() outside the lock because it may
3214   // call malloc() recursively.  The recursive call will never get
3215   // here again because it will find the already allocated heap in the
3216   // linked list of heaps.
3217   if (!heap->in_setspecific_ && tsd_inited) {
3218     heap->in_setspecific_ = true;
3219     setThreadHeap(heap);
3220   }
3221   return heap;
3222 }
3223
3224 void TCMalloc_ThreadCache::BecomeIdle() {
3225   if (!tsd_inited) return;              // No caches yet
3226   TCMalloc_ThreadCache* heap = GetThreadHeap();
3227   if (heap == NULL) return;             // No thread cache to remove
3228   if (heap->in_setspecific_) return;    // Do not disturb the active caller
3229
3230   heap->in_setspecific_ = true;
3231   setThreadHeap(NULL);
3232 #ifdef HAVE_TLS
3233   // Also update the copy in __thread
3234   threadlocal_heap = NULL;
3235 #endif
3236   heap->in_setspecific_ = false;
3237   if (GetThreadHeap() == heap) {
3238     // Somehow heap got reinstated by a recursive call to malloc
3239     // from pthread_setspecific.  We give up in this case.
3240     return;
3241   }
3242
3243   // We can now get rid of the heap
3244   DeleteCache(heap);
3245 }
3246
3247 void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) {
3248   // Note that "ptr" cannot be NULL since pthread promises not
3249   // to invoke the destructor on NULL values, but for safety,
3250   // we check anyway.
3251   if (ptr == NULL) return;
3252 #ifdef HAVE_TLS
3253   // Prevent fast path of GetThreadHeap() from returning heap.
3254   threadlocal_heap = NULL;
3255 #endif
3256   DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr));
3257 }
3258
3259 void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) {
3260   // Remove all memory from heap
3261   heap->Cleanup();
3262
3263   // Remove from linked list
3264   SpinLockHolder h(&pageheap_lock);
3265   if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
3266   if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
3267   if (thread_heaps == heap) thread_heaps = heap->next_;
3268   thread_heap_count--;
3269   RecomputeThreadCacheSize();
3270
3271   threadheap_allocator.Delete(heap);
3272 }
3273
3274 void TCMalloc_ThreadCache::RecomputeThreadCacheSize() {
3275   // Divide available space across threads
3276   int n = thread_heap_count > 0 ? thread_heap_count : 1;
3277   size_t space = overall_thread_cache_size / n;
3278
3279   // Limit to allowed range
3280   if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
3281   if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
3282
3283   per_thread_cache_size = space;
3284 }
3285
3286 void TCMalloc_ThreadCache::Print() const {
3287   for (size_t cl = 0; cl < kNumClasses; ++cl) {
3288     MESSAGE("      %5" PRIuS " : %4d len; %4d lo\n",
3289             ByteSizeForClass(cl),
3290             list_[cl].length(),
3291             list_[cl].lowwatermark());
3292   }
3293 }
3294
3295 // Extract interesting stats
3296 struct TCMallocStats {
3297   uint64_t system_bytes;        // Bytes alloced from system
3298   uint64_t thread_bytes;        // Bytes in thread caches
3299   uint64_t central_bytes;       // Bytes in central cache
3300   uint64_t transfer_bytes;      // Bytes in central transfer cache
3301   uint64_t pageheap_bytes;      // Bytes in page heap
3302   uint64_t metadata_bytes;      // Bytes alloced for metadata
3303 };
3304
3305 #ifndef WTF_CHANGES
3306 // Get stats into "r".  Also get per-size-class counts if class_count != NULL
3307 static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
3308   r->central_bytes = 0;
3309   r->transfer_bytes = 0;
3310   for (int cl = 0; cl < kNumClasses; ++cl) {
3311     const int length = central_cache[cl].length();
3312     const int tc_length = central_cache[cl].tc_length();
3313     r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length;
3314     r->transfer_bytes +=
3315       static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length;
3316     if (class_count) class_count[cl] = length + tc_length;
3317   }
3318
3319   // Add stats from per-thread heaps
3320   r->thread_bytes = 0;
3321   { // scope
3322     SpinLockHolder h(&pageheap_lock);
3323     for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
3324       r->thread_bytes += h->Size();
3325       if (class_count) {
3326         for (size_t cl = 0; cl < kNumClasses; ++cl) {
3327           class_count[cl] += h->freelist_length(cl);
3328         }
3329       }
3330     }
3331   }
3332
3333   { //scope
3334     SpinLockHolder h(&pageheap_lock);
3335     r->system_bytes = pageheap->SystemBytes();
3336     r->metadata_bytes = metadata_system_bytes;
3337     r->pageheap_bytes = pageheap->FreeBytes();
3338   }
3339 }
3340 #endif
3341
3342 #ifndef WTF_CHANGES
3343 // WRITE stats to "out"
3344 static void DumpStats(TCMalloc_Printer* out, int level) {
3345   TCMallocStats stats;
3346   uint64_t class_count[kNumClasses];
3347   ExtractStats(&stats, (level >= 2 ? class_count : NULL));
3348
3349   if (level >= 2) {
3350     out->printf("------------------------------------------------\n");
3351     uint64_t cumulative = 0;
3352     for (int cl = 0; cl < kNumClasses; ++cl) {
3353       if (class_count[cl] > 0) {
3354         uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl);
3355         cumulative += class_bytes;
3356         out->printf("class %3d [ %8" PRIuS " bytes ] : "
3357                 "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n",
3358                 cl, ByteSizeForClass(cl),
3359                 class_count[cl],
3360                 class_bytes / 1048576.0,
3361                 cumulative / 1048576.0);
3362       }
3363     }
3364
3365     SpinLockHolder h(&pageheap_lock);
3366     pageheap->Dump(out);
3367   }
3368
3369   const uint64_t bytes_in_use = stats.system_bytes
3370                                 - stats.pageheap_bytes
3371                                 - stats.central_bytes
3372                                 - stats.transfer_bytes
3373                                 - stats.thread_bytes;
3374
3375   out->printf("------------------------------------------------\n"
3376               "MALLOC: %12" PRIu64 " Heap size\n"
3377               "MALLOC: %12" PRIu64 " Bytes in use by application\n"
3378               "MALLOC: %12" PRIu64 " Bytes free in page heap\n"
3379               "MALLOC: %12" PRIu64 " Bytes free in central cache\n"
3380               "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n"
3381               "MALLOC: %12" PRIu64 " Bytes free in thread caches\n"
3382               "MALLOC: %12" PRIu64 " Spans in use\n"
3383               "MALLOC: %12" PRIu64 " Thread heaps in use\n"
3384               "MALLOC: %12" PRIu64 " Metadata allocated\n"
3385               "------------------------------------------------\n",
3386               stats.system_bytes,
3387               bytes_in_use,
3388               stats.pageheap_bytes,
3389               stats.central_bytes,
3390               stats.transfer_bytes,
3391               stats.thread_bytes,
3392               uint64_t(span_allocator.inuse()),
3393               uint64_t(threadheap_allocator.inuse()),
3394               stats.metadata_bytes);
3395 }
3396
3397 static void PrintStats(int level) {
3398   const int kBufferSize = 16 << 10;
3399   char* buffer = new char[kBufferSize];
3400   TCMalloc_Printer printer(buffer, kBufferSize);
3401   DumpStats(&printer, level);
3402   write(STDERR_FILENO, buffer, strlen(buffer));
3403   delete[] buffer;
3404 }
3405
3406 static void** DumpStackTraces() {
3407   // Count how much space we need
3408   int needed_slots = 0;
3409   {
3410     SpinLockHolder h(&pageheap_lock);
3411     for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
3412       StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
3413       needed_slots += 3 + stack->depth;
3414     }
3415     needed_slots += 100;            // Slop in case sample grows
3416     needed_slots += needed_slots/8; // An extra 12.5% slop
3417   }
3418
3419   void** result = new void*[needed_slots];
3420   if (result == NULL) {
3421     MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n",
3422             needed_slots);
3423     return NULL;
3424   }
3425
3426   SpinLockHolder h(&pageheap_lock);
3427   int used_slots = 0;
3428   for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
3429     ASSERT(used_slots < needed_slots);  // Need to leave room for terminator
3430     StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
3431     if (used_slots + 3 + stack->depth >= needed_slots) {
3432       // No more room
3433       break;
3434     }
3435
3436     result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
3437     result[used_slots+1] = reinterpret_cast<void*>(stack->size);
3438     result[used_slots+2] = reinterpret_cast<void*>(stack->depth);
3439     for (int d = 0; d < stack->depth; d++) {
3440       result[used_slots+3+d] = stack->stack[d];
3441     }
3442     used_slots += 3 + stack->depth;
3443   }
3444   result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
3445   return result;
3446 }
3447 #endif
3448
3449 #ifndef WTF_CHANGES
3450
3451 // TCMalloc's support for extra malloc interfaces
3452 class TCMallocImplementation : public MallocExtension {
3453  public:
3454   virtual void GetStats(char* buffer, int buffer_length) {
3455     ASSERT(buffer_length > 0);
3456     TCMalloc_Printer printer(buffer, buffer_length);
3457
3458     // Print level one stats unless lots of space is available
3459     if (buffer_length < 10000) {
3460       DumpStats(&printer, 1);
3461     } else {
3462       DumpStats(&printer, 2);
3463     }
3464   }
3465
3466   virtual void** ReadStackTraces() {
3467     return DumpStackTraces();
3468   }
3469
3470   virtual bool GetNumericProperty(const char* name, size_t* value) {
3471     ASSERT(name != NULL);
3472
3473     if (strcmp(name, "generic.current_allocated_bytes") == 0) {
3474       TCMallocStats stats;
3475       ExtractStats(&stats, NULL);
3476       *value = stats.system_bytes
3477                - stats.thread_bytes
3478                - stats.central_bytes
3479                - stats.pageheap_bytes;
3480       return true;
3481     }
3482
3483     if (strcmp(name, "generic.heap_size") == 0) {
3484       TCMallocStats stats;
3485       ExtractStats(&stats, NULL);
3486       *value = stats.system_bytes;
3487       return true;
3488     }
3489
3490     if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
3491       // We assume that bytes in the page heap are not fragmented too
3492       // badly, and are therefore available for allocation.
3493       SpinLockHolder l(&pageheap_lock);
3494       *value = pageheap->FreeBytes();
3495       return true;
3496     }
3497
3498     if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3499       SpinLockHolder l(&pageheap_lock);
3500       *value = overall_thread_cache_size;
3501       return true;
3502     }
3503
3504     if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
3505       TCMallocStats stats;
3506       ExtractStats(&stats, NULL);
3507       *value = stats.thread_bytes;
3508       return true;
3509     }
3510
3511     return false;
3512   }
3513
3514   virtual bool SetNumericProperty(const char* name, size_t value) {
3515     ASSERT(name != NULL);
3516
3517     if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3518       // Clip the value to a reasonable range
3519       if (value < kMinThreadCacheSize) value = kMinThreadCacheSize;
3520       if (value > (1<<30)) value = (1<<30);     // Limit to 1GB
3521
3522       SpinLockHolder l(&pageheap_lock);
3523       overall_thread_cache_size = static_cast<size_t>(value);
3524       TCMalloc_ThreadCache::RecomputeThreadCacheSize();
3525       return true;
3526     }
3527
3528     return false;
3529   }
3530
3531   virtual void MarkThreadIdle() {
3532     TCMalloc_ThreadCache::BecomeIdle();
3533   }
3534
3535   virtual void ReleaseFreeMemory() {
3536     SpinLockHolder h(&pageheap_lock);
3537     pageheap->ReleaseFreePages();
3538   }
3539 };
3540 #endif
3541
3542 // The constructor allocates an object to ensure that initialization
3543 // runs before main(), and therefore we do not have a chance to become
3544 // multi-threaded before initialization.  We also create the TSD key
3545 // here.  Presumably by the time this constructor runs, glibc is in
3546 // good enough shape to handle pthread_key_create().
3547 //
3548 // The constructor also takes the opportunity to tell STL to use
3549 // tcmalloc.  We want to do this early, before construct time, so
3550 // all user STL allocations go through tcmalloc (which works really
3551 // well for STL).
3552 //
3553 // The destructor prints stats when the program exits.
3554 class TCMallocGuard {
3555  public:
3556
3557   TCMallocGuard() {
3558 #ifdef HAVE_TLS    // this is true if the cc/ld/libc combo support TLS
3559     // Check whether the kernel also supports TLS (needs to happen at runtime)
3560     CheckIfKernelSupportsTLS();
3561 #endif
3562 #ifndef WTF_CHANGES
3563 #ifdef WIN32                    // patch the windows VirtualAlloc, etc.
3564     PatchWindowsFunctions();    // defined in windows/patch_functions.cc
3565 #endif
3566 #endif
3567     free(malloc(1));
3568     TCMalloc_ThreadCache::InitTSD();
3569     free(malloc(1));
3570 #ifndef WTF_CHANGES
3571     MallocExtension::Register(new TCMallocImplementation);
3572 #endif
3573   }
3574
3575 #ifndef WTF_CHANGES
3576   ~TCMallocGuard() {
3577     const char* env = getenv("MALLOCSTATS");
3578     if (env != NULL) {
3579       int level = atoi(env);
3580       if (level < 1) level = 1;
3581       PrintStats(level);
3582     }
3583 #ifdef WIN32
3584     UnpatchWindowsFunctions();
3585 #endif
3586   }
3587 #endif
3588 };
3589
3590 #ifndef WTF_CHANGES
3591 static TCMallocGuard module_enter_exit_hook;
3592 #endif
3593
3594
3595 //-------------------------------------------------------------------
3596 // Helpers for the exported routines below
3597 //-------------------------------------------------------------------
3598
3599 #ifndef WTF_CHANGES
3600
3601 static Span* DoSampledAllocation(size_t size) {
3602
3603   // Grab the stack trace outside the heap lock
3604   StackTrace tmp;
3605   tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1);
3606   tmp.size = size;
3607
3608   SpinLockHolder h(&pageheap_lock);
3609   // Allocate span
3610   Span *span = pageheap->New(pages(size == 0 ? 1 : size));
3611   if (span == NULL) {
3612     return NULL;
3613   }
3614
3615   // Allocate stack trace
3616   StackTrace *stack = stacktrace_allocator.New();
3617   if (stack == NULL) {
3618     // Sampling failed because of lack of memory
3619     return span;
3620   }
3621
3622   *stack = tmp;
3623   span->sample = 1;
3624   span->objects = stack;
3625   DLL_Prepend(&sampled_objects, span);
3626
3627   return span;
3628 }
3629 #endif
3630
3631 static inline bool CheckCachedSizeClass(void *ptr) {
3632   PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3633   size_t cached_value = pageheap->GetSizeClassIfCached(p);
3634   return cached_value == 0 ||
3635       cached_value == pageheap->GetDescriptor(p)->sizeclass;
3636 }
3637
3638 static inline void* CheckedMallocResult(void *result)
3639 {
3640   ASSERT(result == 0 || CheckCachedSizeClass(result));
3641   return result;
3642 }
3643
3644 static inline void* SpanToMallocResult(Span *span) {
3645   ASSERT_SPAN_COMMITTED(span);
3646   pageheap->CacheSizeClass(span->start, 0);
3647   return
3648       CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
3649 }
3650
3651 #ifdef WTF_CHANGES
3652 template <bool crashOnFailure>
3653 #endif
3654 static ALWAYS_INLINE void* do_malloc(size_t size) {
3655   void* ret = NULL;
3656
3657 #ifdef WTF_CHANGES
3658     ASSERT(!isForbidden());
3659 #endif
3660
3661   // The following call forces module initialization
3662   TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3663 #ifndef WTF_CHANGES
3664   if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
3665     Span* span = DoSampledAllocation(size);
3666     if (span != NULL) {
3667       ret = SpanToMallocResult(span);
3668     }
3669   } else
3670 #endif
3671   if (size > kMaxSize) {
3672     // Use page-level allocator
3673     SpinLockHolder h(&pageheap_lock);
3674     Span* span = pageheap->New(pages(size));
3675     if (span != NULL) {
3676       ret = SpanToMallocResult(span);
3677     }
3678   } else {
3679     // The common case, and also the simplest.  This just pops the
3680     // size-appropriate freelist, afer replenishing it if it's empty.
3681     ret = CheckedMallocResult(heap->Allocate(size));
3682   }
3683   if (!ret) {
3684 #ifdef WTF_CHANGES
3685     if (crashOnFailure) // This branch should be optimized out by the compiler.
3686         CRASH();
3687 #else
3688     errno = ENOMEM;
3689 #endif
3690   }
3691   return ret;
3692 }
3693
3694 static ALWAYS_INLINE void do_free(void* ptr) {
3695   if (ptr == NULL) return;
3696   ASSERT(pageheap != NULL);  // Should not call free() before malloc()
3697   const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3698   Span* span = NULL;
3699   size_t cl = pageheap->GetSizeClassIfCached(p);
3700
3701   if (cl == 0) {
3702     span = pageheap->GetDescriptor(p);
3703     cl = span->sizeclass;
3704     pageheap->CacheSizeClass(p, cl);
3705   }
3706   if (cl != 0) {
3707 #ifndef NO_TCMALLOC_SAMPLES
3708     ASSERT(!pageheap->GetDescriptor(p)->sample);
3709 #endif
3710     TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent();
3711     if (heap != NULL) {
3712       heap->Deallocate(ptr, cl);
3713     } else {
3714       // Delete directly into central cache
3715       SLL_SetNext(ptr, NULL);
3716       central_cache[cl].InsertRange(ptr, ptr, 1);
3717     }
3718   } else {
3719     SpinLockHolder h(&pageheap_lock);
3720     ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
3721     ASSERT(span != NULL && span->start == p);
3722 #ifndef NO_TCMALLOC_SAMPLES
3723     if (span->sample) {
3724       DLL_Remove(span);
3725       stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects));
3726       span->objects = NULL;
3727     }
3728 #endif
3729     pageheap->Delete(span);
3730   }
3731 }
3732
3733 #ifndef WTF_CHANGES
3734 // For use by exported routines below that want specific alignments
3735 //
3736 // Note: this code can be slow, and can significantly fragment memory.
3737 // The expectation is that memalign/posix_memalign/valloc/pvalloc will
3738 // not be invoked very often.  This requirement simplifies our
3739 // implementation and allows us to tune for expected allocation
3740 // patterns.
3741 static void* do_memalign(size_t align, size_t size) {
3742   ASSERT((align & (align - 1)) == 0);
3743   ASSERT(align > 0);
3744   if (pageheap == NULL) TCMalloc_ThreadCache::InitModule();
3745
3746   // Allocate at least one byte to avoid boundary conditions below
3747   if (size == 0) size = 1;
3748
3749   if (size <= kMaxSize && align < kPageSize) {
3750     // Search through acceptable size classes looking for one with
3751     // enough alignment.  This depends on the fact that
3752     // InitSizeClasses() currently produces several size classes that
3753     // are aligned at powers of two.  We will waste time and space if
3754     // we miss in the size class array, but that is deemed acceptable
3755     // since memalign() should be used rarely.
3756     size_t cl = SizeClass(size);
3757     while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) {
3758       cl++;
3759     }
3760     if (cl < kNumClasses) {
3761       TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3762       return CheckedMallocResult(heap->Allocate(class_to_size[cl]));
3763     }
3764   }
3765
3766   // We will allocate directly from the page heap
3767   SpinLockHolder h(&pageheap_lock);
3768
3769   if (align <= kPageSize) {
3770     // Any page-level allocation will be fine
3771     // TODO: We could put the rest of this page in the appropriate
3772     // TODO: cache but it does not seem worth it.
3773     Span* span = pageheap->New(pages(size));
3774     return span == NULL ? NULL : SpanToMallocResult(span);
3775   }
3776
3777   // Allocate extra pages and carve off an aligned portion
3778   const Length alloc = pages(size + align);
3779   Span* span = pageheap->New(alloc);
3780   if (span == NULL) return NULL;
3781
3782   // Skip starting portion so that we end up aligned
3783   Length skip = 0;
3784   while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
3785     skip++;
3786   }
3787   ASSERT(skip < alloc);
3788   if (skip > 0) {
3789     Span* rest = pageheap->Split(span, skip);
3790     pageheap->Delete(span);
3791     span = rest;
3792   }
3793
3794   // Skip trailing portion that we do not need to return
3795   const Length needed = pages(size);
3796   ASSERT(span->length >= needed);
3797   if (span->length > needed) {
3798     Span* trailer = pageheap->Split(span, needed);
3799     pageheap->Delete(trailer);
3800   }
3801   return SpanToMallocResult(span);
3802 }
3803 #endif
3804
3805 // Helpers for use by exported routines below:
3806
3807 #ifndef WTF_CHANGES
3808 static inline void do_malloc_stats() {
3809   PrintStats(1);
3810 }
3811 #endif
3812
3813 static inline int do_mallopt(int, int) {
3814   return 1;     // Indicates error
3815 }
3816
3817 #ifdef HAVE_STRUCT_MALLINFO  // mallinfo isn't defined on freebsd, for instance
3818 static inline struct mallinfo do_mallinfo() {
3819   TCMallocStats stats;
3820   ExtractStats(&stats, NULL);
3821
3822   // Just some of the fields are filled in.
3823   struct mallinfo info;
3824   memset(&info, 0, sizeof(info));
3825
3826   // Unfortunately, the struct contains "int" field, so some of the
3827   // size values will be truncated.
3828   info.arena     = static_cast<int>(stats.system_bytes);
3829   info.fsmblks   = static_cast<int>(stats.thread_bytes
3830                                     + stats.central_bytes
3831                                     + stats.transfer_bytes);
3832   info.fordblks  = static_cast<int>(stats.pageheap_bytes);
3833   info.uordblks  = static_cast<int>(stats.system_bytes
3834                                     - stats.thread_bytes
3835                                     - stats.central_bytes
3836                                     - stats.transfer_bytes
3837                                     - stats.pageheap_bytes);
3838
3839   return info;
3840 }
3841 #endif
3842
3843 //-------------------------------------------------------------------
3844 // Exported routines
3845 //-------------------------------------------------------------------
3846
3847 // CAVEAT: The code structure below ensures that MallocHook methods are always
3848 //         called from the stack frame of the invoked allocation function.
3849 //         heap-checker.cc depends on this to start a stack trace from
3850 //         the call to the (de)allocation function.
3851
3852 #ifndef WTF_CHANGES
3853 extern "C"
3854 #else
3855 #define do_malloc do_malloc<crashOnFailure>
3856
3857 template <bool crashOnFailure>
3858 ALWAYS_INLINE void* malloc(size_t);
3859
3860 void* fastMalloc(size_t size)
3861 {
3862     return malloc<true>(size);
3863 }
3864
3865 TryMallocReturnValue tryFastMalloc(size_t size)
3866 {
3867     return malloc<false>(size);
3868 }
3869
3870 template <bool crashOnFailure>
3871 ALWAYS_INLINE
3872 #endif
3873 void* malloc(size_t size) {
3874 #if ENABLE(WTF_MALLOC_VALIDATION)
3875     if (std::numeric_limits<size_t>::max() - Internal::ValidationBufferSize <= size)  // If overflow would occur...
3876         return 0;
3877     void* result = do_malloc(size + Internal::ValidationBufferSize);
3878     if (!result)
3879         return 0;
3880
3881     Internal::ValidationHeader* header = static_cast<Internal::ValidationHeader*>(result);
3882     header->m_size = size;
3883     header->m_type = Internal::AllocTypeMalloc;
3884     header->m_prefix = static_cast<unsigned>(Internal::ValidationPrefix);
3885     result = header + 1;
3886     *Internal::fastMallocValidationSuffix(result) = Internal::ValidationSuffix;
3887     fastMallocValidate(result);
3888 #else
3889     void* result = do_malloc(size);
3890 #endif
3891
3892 #ifndef WTF_CHANGES
3893   MallocHook::InvokeNewHook(result, size);
3894 #endif
3895   return result;
3896 }
3897
3898 #ifndef WTF_CHANGES
3899 extern "C"
3900 #endif
3901 void free(void* ptr) {
3902 #ifndef WTF_CHANGES
3903   MallocHook::InvokeDeleteHook(ptr);
3904 #endif
3905
3906 #if ENABLE(WTF_MALLOC_VALIDATION)
3907     if (!ptr)
3908         return;
3909
3910     fastMallocValidate(ptr);
3911     Internal::ValidationHeader* header = Internal::fastMallocValidationHeader(ptr);
3912     memset(ptr, 0xCC, header->m_size);
3913     do_free(header);
3914 #else
3915     do_free(ptr);
3916 #endif
3917 }
3918
3919 #ifndef WTF_CHANGES
3920 extern "C"
3921 #else
3922 template <bool crashOnFailure>
3923 ALWAYS_INLINE void* calloc(size_t, size_t);
3924
3925 void* fastCalloc(size_t n, size_t elem_size)
3926 {
3927     void* result = calloc<true>(n, elem_size);
3928 #if ENABLE(WTF_MALLOC_VALIDATION)
3929     fastMallocValidate(result);
3930 #endif
3931     return result;
3932 }
3933
3934 TryMallocReturnValue tryFastCalloc(size_t n, size_t elem_size)
3935 {
3936     void* result = calloc<false>(n, elem_size);
3937 #if ENABLE(WTF_MALLOC_VALIDATION)
3938     fastMallocValidate(result);
3939 #endif
3940     return result;
3941 }
3942
3943 template <bool crashOnFailure>
3944 ALWAYS_INLINE
3945 #endif
3946 void* calloc(size_t n, size_t elem_size) {
3947   size_t totalBytes = n * elem_size;
3948
3949   // Protect against overflow
3950   if (n > 1 && elem_size && (totalBytes / elem_size) != n)
3951     return 0;
3952
3953 #if ENABLE(WTF_MALLOC_VALIDATION)
3954     void* result = malloc<crashOnFailure>(totalBytes);
3955     if (!result)
3956         return 0;
3957
3958     memset(result, 0, totalBytes);
3959     fastMallocValidate(result);
3960 #else
3961     void* result = do_malloc(totalBytes);
3962     if (result != NULL) {
3963         memset(result, 0, totalBytes);
3964     }
3965 #endif
3966
3967 #ifndef WTF_CHANGES
3968   MallocHook::InvokeNewHook(result, totalBytes);
3969 #endif
3970   return result;
3971 }
3972
3973 // Since cfree isn't used anywhere, we don't compile it in.
3974 #ifndef WTF_CHANGES
3975 #ifndef WTF_CHANGES
3976 extern "C"
3977 #endif
3978 void cfree(void* ptr) {
3979 #ifndef WTF_CHANGES
3980     MallocHook::InvokeDeleteHook(ptr);
3981 #endif
3982   do_free(ptr);
3983 }
3984 #endif
3985
3986 #ifndef WTF_CHANGES
3987 extern "C"
3988 #else
3989 template <bool crashOnFailure>
3990 ALWAYS_INLINE void* realloc(void*, size_t);
3991
3992 void* fastRealloc(void* old_ptr, size_t new_size)
3993 {
3994 #if ENABLE(WTF_MALLOC_VALIDATION)
3995     fastMallocValidate(old_ptr);
3996 #endif
3997     void* result = realloc<true>(old_ptr, new_size);
3998 #if ENABLE(WTF_MALLOC_VALIDATION)
3999     fastMallocValidate(result);
4000 #endif
4001     return result;
4002 }
4003
4004 TryMallocReturnValue tryFastRealloc(void* old_ptr, size_t new_size)
4005 {
4006 #if ENABLE(WTF_MALLOC_VALIDATION)
4007     fastMallocValidate(old_ptr);
4008 #endif
4009     void* result = realloc<false>(old_ptr, new_size);
4010 #if ENABLE(WTF_MALLOC_VALIDATION)
4011     fastMallocValidate(result);
4012 #endif
4013     return result;
4014 }
4015
4016 template <bool crashOnFailure>
4017 ALWAYS_INLINE
4018 #endif
4019 void* realloc(void* old_ptr, size_t new_size) {
4020   if (old_ptr == NULL) {
4021 #if ENABLE(WTF_MALLOC_VALIDATION)
4022     void* result = malloc<crashOnFailure>(new_size);
4023 #else
4024     void* result = do_malloc(new_size);
4025 #ifndef WTF_CHANGES
4026     MallocHook::InvokeNewHook(result, new_size);
4027 #endif
4028 #endif
4029     return result;
4030   }
4031   if (new_size == 0) {
4032 #ifndef WTF_CHANGES
4033     MallocHook::InvokeDeleteHook(old_ptr);
4034 #endif
4035     free(old_ptr);
4036     return NULL;
4037   }
4038
4039 #if ENABLE(WTF_MALLOC_VALIDATION)
4040     if (std::numeric_limits<size_t>::max() - Internal::ValidationBufferSize <= new_size)  // If overflow would occur...
4041         return 0;
4042     Internal::ValidationHeader* header = Internal::fastMallocValidationHeader(old_ptr);
4043     fastMallocValidate(old_ptr);
4044     old_ptr = header;
4045     header->m_size = new_size;
4046     new_size += Internal::ValidationBufferSize;
4047 #endif
4048
4049   // Get the size of the old entry
4050   const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift;
4051   size_t cl = pageheap->GetSizeClassIfCached(p);
4052   Span *span = NULL;
4053   size_t old_size;
4054   if (cl == 0) {
4055     span = pageheap->GetDescriptor(p);
4056     cl = span->sizeclass;
4057     pageheap->CacheSizeClass(p, cl);
4058   }
4059   if (cl != 0) {
4060     old_size = ByteSizeForClass(cl);
4061   } else {
4062     ASSERT(span != NULL);
4063     old_size = span->length << kPageShift;
4064   }
4065
4066   // Reallocate if the new size is larger than the old size,
4067   // or if the new size is significantly smaller than the old size.
4068   if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) {
4069     // Need to reallocate
4070     void* new_ptr = do_malloc(new_size);
4071     if (new_ptr == NULL) {
4072       return NULL;
4073     }
4074 #ifndef WTF_CHANGES
4075     MallocHook::InvokeNewHook(new_ptr, new_size);
4076 #endif
4077     memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
4078 #ifndef WTF_CHANGES
4079     MallocHook::InvokeDeleteHook(old_ptr);
4080 #endif
4081     // We could use a variant of do_free() that leverages the fact
4082     // that we already know the sizeclass of old_ptr.  The benefit
4083     // would be small, so don't bother.
4084     do_free(old_ptr);
4085 #if ENABLE(WTF_MALLOC_VALIDATION)
4086     new_ptr = static_cast<Internal::ValidationHeader*>(new_ptr) + 1;
4087     *Internal::fastMallocValidationSuffix(new_ptr) = Internal::ValidationSuffix;
4088 #endif
4089     return new_ptr;
4090   } else {
4091 #if ENABLE(WTF_MALLOC_VALIDATION)
4092     old_ptr = static_cast<Internal::ValidationHeader*>(old_ptr) + 1; // Set old_ptr back to the user pointer.
4093     *Internal::fastMallocValidationSuffix(old_ptr) = Internal::ValidationSuffix;
4094 #endif
4095     return old_ptr;
4096   }
4097 }
4098
4099 #ifdef WTF_CHANGES
4100 #undef do_malloc
4101 #else
4102
4103 static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER;
4104
4105 static inline void* cpp_alloc(size_t size, bool nothrow) {
4106   for (;;) {
4107     void* p = do_malloc(size);
4108 #ifdef PREANSINEW
4109     return p;
4110 #else
4111     if (p == NULL) {  // allocation failed
4112       // Get the current new handler.  NB: this function is not
4113       // thread-safe.  We make a feeble stab at making it so here, but
4114       // this lock only protects against tcmalloc interfering with
4115       // itself, not with other libraries calling set_new_handler.
4116       std::new_handler nh;
4117       {
4118         SpinLockHolder h(&set_new_handler_lock);
4119         nh = std::set_new_handler(0);
4120         (void) std::set_new_handler(nh);
4121       }
4122       // If no new_handler is established, the allocation failed.
4123       if (!nh) {
4124         if (nothrow) return 0;
4125         throw std::bad_alloc();
4126       }
4127       // Otherwise, try the new_handler.  If it returns, retry the
4128       // allocation.  If it throws std::bad_alloc, fail the allocation.
4129       // if it throws something else, don't interfere.
4130       try {
4131         (*nh)();
4132       } catch (const std::bad_alloc&) {
4133         if (!nothrow) throw;
4134         return p;
4135       }
4136     } else {  // allocation success
4137       return p;
4138     }
4139 #endif
4140   }
4141 }
4142
4143 #if ENABLE(GLOBAL_FASTMALLOC_NEW)
4144
4145 void* operator new(size_t size) {
4146   void* p = cpp_alloc(size, false);
4147   // We keep this next instruction out of cpp_alloc for a reason: when
4148   // it's in, and new just calls cpp_alloc, the optimizer may fold the
4149   // new call into cpp_alloc, which messes up our whole section-based
4150   // stacktracing (see ATTRIBUTE_SECTION, above).  This ensures cpp_alloc
4151   // isn't the last thing this fn calls, and prevents the folding.
4152   MallocHook::InvokeNewHook(p, size);
4153   return p;
4154 }
4155
4156 void* operator new(size_t size, const std::nothrow_t&) __THROW {
4157   void* p = cpp_alloc(size, true);
4158   MallocHook::InvokeNewHook(p, size);
4159   return p;
4160 }
4161
4162 void operator delete(void* p) __THROW {
4163   MallocHook::InvokeDeleteHook(p);
4164   do_free(p);
4165 }
4166
4167 void operator delete(void* p, const std::nothrow_t&) __THROW {
4168   MallocHook::InvokeDeleteHook(p);
4169   do_free(p);
4170 }
4171
4172 void* operator new[](size_t size) {
4173   void* p = cpp_alloc(size, false);
4174   // We keep this next instruction out of cpp_alloc for a reason: when
4175   // it's in, and new just calls cpp_alloc, the optimizer may fold the
4176   // new call into cpp_alloc, which messes up our whole section-based
4177   // stacktracing (see ATTRIBUTE_SECTION, above).  This ensures cpp_alloc
4178   // isn't the last thing this fn calls, and prevents the folding.
4179   MallocHook::InvokeNewHook(p, size);
4180   return p;
4181 }
4182
4183 void* operator new[](size_t size, const std::nothrow_t&) __THROW {
4184   void* p = cpp_alloc(size, true);
4185   MallocHook::InvokeNewHook(p, size);
4186   return p;
4187 }
4188
4189 void operator delete[](void* p) __THROW {
4190   MallocHook::InvokeDeleteHook(p);
4191   do_free(p);
4192 }
4193
4194 void operator delete[](void* p, const std::nothrow_t&) __THROW {
4195   MallocHook::InvokeDeleteHook(p);
4196   do_free(p);
4197 }
4198
4199 #endif
4200
4201 extern "C" void* memalign(size_t align, size_t size) __THROW {
4202   void* result = do_memalign(align, size);
4203   MallocHook::InvokeNewHook(result, size);
4204   return result;
4205 }
4206
4207 extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size)
4208     __THROW {
4209   if (((align % sizeof(void*)) != 0) ||
4210       ((align & (align - 1)) != 0) ||
4211       (align == 0)) {
4212     return EINVAL;
4213   }
4214
4215   void* result = do_memalign(align, size);
4216   MallocHook::InvokeNewHook(result, size);
4217   if (result == NULL) {
4218     return ENOMEM;
4219   } else {
4220     *result_ptr = result;
4221     return 0;
4222   }
4223 }
4224
4225 static size_t pagesize = 0;
4226
4227 extern "C" void* valloc(size_t size) __THROW {
4228   // Allocate page-aligned object of length >= size bytes
4229   if (pagesize == 0) pagesize = getpagesize();
4230   void* result = do_memalign(pagesize, size);
4231   MallocHook::InvokeNewHook(result, size);
4232   return result;
4233 }
4234
4235 extern "C" void* pvalloc(size_t size) __THROW {
4236   // Round up size to a multiple of pagesize
4237   if (pagesize == 0) pagesize = getpagesize();
4238   size = (size + pagesize - 1) & ~(pagesize - 1);
4239   void* result = do_memalign(pagesize, size);
4240   MallocHook::InvokeNewHook(result, size);
4241   return result;
4242 }
4243
4244 extern "C" void malloc_stats(void) {
4245   do_malloc_stats();
4246 }
4247
4248 extern "C" int mallopt(int cmd, int value) {
4249   return do_mallopt(cmd, value);
4250 }
4251
4252 #ifdef HAVE_STRUCT_MALLINFO
4253 extern "C" struct mallinfo mallinfo(void) {
4254   return do_mallinfo();
4255 }
4256 #endif
4257
4258 //-------------------------------------------------------------------
4259 // Some library routines on RedHat 9 allocate memory using malloc()
4260 // and free it using __libc_free() (or vice-versa).  Since we provide
4261 // our own implementations of malloc/free, we need to make sure that
4262 // the __libc_XXX variants (defined as part of glibc) also point to
4263 // the same implementations.
4264 //-------------------------------------------------------------------
4265
4266 #if defined(__GLIBC__)
4267 extern "C" {
4268 #if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__)
4269   // Potentially faster variants that use the gcc alias extension.
4270   // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check.
4271 # define ALIAS(x) __attribute__ ((weak, alias (x)))
4272   void* __libc_malloc(size_t size)              ALIAS("malloc");
4273   void  __libc_free(void* ptr)                  ALIAS("free");
4274   void* __libc_realloc(void* ptr, size_t size)  ALIAS("realloc");
4275   void* __libc_calloc(size_t n, size_t size)    ALIAS("calloc");
4276   void  __libc_cfree(void* ptr)                 ALIAS("cfree");
4277   void* __libc_memalign(size_t align, size_t s) ALIAS("memalign");
4278   void* __libc_valloc(size_t size)              ALIAS("valloc");
4279   void* __libc_pvalloc(size_t size)             ALIAS("pvalloc");
4280   int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign");
4281 # undef ALIAS
4282 # else   /* not __GNUC__ */
4283   // Portable wrappers
4284   void* __libc_malloc(size_t size)              { return malloc(size);       }
4285   void  __libc_free(void* ptr)                  { free(ptr);                 }
4286   void* __libc_realloc(void* ptr, size_t size)  { return realloc(ptr, size); }
4287   void* __libc_calloc(size_t n, size_t size)    { return calloc(n, size);    }
4288   void  __libc_cfree(void* ptr)                 { cfree(ptr);                }
4289   void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); }
4290   void* __libc_valloc(size_t size)              { return valloc(size);       }
4291   void* __libc_pvalloc(size_t size)             { return pvalloc(size);      }
4292   int __posix_memalign(void** r, size_t a, size_t s) {
4293     return posix_memalign(r, a, s);
4294   }
4295 # endif  /* __GNUC__ */
4296 }
4297 #endif   /* __GLIBC__ */
4298
4299 // Override __libc_memalign in libc on linux boxes specially.
4300 // They have a bug in libc that causes them to (very rarely) allocate
4301 // with __libc_memalign() yet deallocate with free() and the
4302 // definitions above don't catch it.
4303 // This function is an exception to the rule of calling MallocHook method
4304 // from the stack frame of the allocation function;
4305 // heap-checker handles this special case explicitly.
4306 static void *MemalignOverride(size_t align, size_t size, const void *caller)
4307     __THROW {
4308   void* result = do_memalign(align, size);
4309   MallocHook::InvokeNewHook(result, size);
4310   return result;
4311 }
4312 void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
4313
4314 #endif
4315
4316 #ifdef WTF_CHANGES
4317 void releaseFastMallocFreeMemory()
4318 {
4319     // Flush free pages in the current thread cache back to the page heap.
4320     // Low watermark mechanism in Scavenge() prevents full return on the first pass.
4321     // The second pass flushes everything.
4322     if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) {
4323         threadCache->Scavenge();
4324         threadCache->Scavenge();
4325     }
4326
4327     SpinLockHolder h(&pageheap_lock);
4328     pageheap->ReleaseFreePages();
4329 }
4330
4331 FastMallocStatistics fastMallocStatistics()
4332 {
4333     FastMallocStatistics statistics;
4334
4335     SpinLockHolder lockHolder(&pageheap_lock);
4336     statistics.reservedVMBytes = static_cast<size_t>(pageheap->SystemBytes());
4337     statistics.committedVMBytes = statistics.reservedVMBytes - pageheap->ReturnedBytes();
4338
4339     statistics.freeListBytes = 0;
4340     for (unsigned cl = 0; cl < kNumClasses; ++cl) {
4341         const int length = central_cache[cl].length();
4342         const int tc_length = central_cache[cl].tc_length();
4343
4344         statistics.freeListBytes += ByteSizeForClass(cl) * (length + tc_length);
4345     }
4346     for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_)
4347         statistics.freeListBytes += threadCache->Size();
4348
4349     return statistics;
4350 }
4351
4352 size_t fastMallocSize(const void* ptr)
4353 {
4354 #if ENABLE(WTF_MALLOC_VALIDATION)
4355     return Internal::fastMallocValidationHeader(const_cast<void*>(ptr))->m_size;
4356 #else
4357     const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
4358     Span* span = pageheap->GetDescriptorEnsureSafe(p);
4359
4360     if (!span || span->free)
4361         return 0;
4362
4363     for (void* free = span->objects; free != NULL; free = *((void**) free)) {
4364         if (ptr == free)
4365             return 0;
4366     }
4367
4368     if (size_t cl = span->sizeclass)
4369         return ByteSizeForClass(cl);
4370
4371     return span->length << kPageShift;
4372 #endif
4373 }
4374
4375 #if OS(DARWIN)
4376
4377 class FreeObjectFinder {
4378     const RemoteMemoryReader& m_reader;
4379     HashSet<void*> m_freeObjects;
4380
4381 public:
4382     FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { }
4383
4384     void visit(void* ptr) { m_freeObjects.add(ptr); }
4385     bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); }
4386     bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); }
4387     size_t freeObjectCount() const { return m_freeObjects.size(); }
4388
4389     void findFreeObjects(TCMalloc_ThreadCache* threadCache)
4390     {
4391         for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0))
4392             threadCache->enumerateFreeObjects(*this, m_reader);
4393     }
4394
4395     void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList)
4396     {
4397         for (unsigned i = 0; i < numSizes; i++)
4398             centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i);
4399     }
4400 };
4401
4402 class PageMapFreeObjectFinder {
4403     const RemoteMemoryReader& m_reader;
4404     FreeObjectFinder& m_freeObjectFinder;
4405
4406 public:
4407     PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder)
4408         : m_reader(reader)
4409         , m_freeObjectFinder(freeObjectFinder)
4410     { }
4411
4412     int visit(void* ptr) const
4413     {
4414         if (!ptr)
4415             return 1;
4416
4417         Span* span = m_reader(reinterpret_cast<Span*>(ptr));
4418         if (!span)
4419             return 1;
4420
4421         if (span->free) {
4422             void* ptr = reinterpret_cast<void*>(span->start << kPageShift);
4423             m_freeObjectFinder.visit(ptr);
4424         } else if (span->sizeclass) {
4425             // Walk the free list of the small-object span, keeping track of each object seen
4426             for (void* nextObject = span->objects; nextObject; nextObject = m_reader.nextEntryInLinkedList(reinterpret_cast<void**>(nextObject)))
4427                 m_freeObjectFinder.visit(nextObject);
4428         }
4429         return span->length;
4430     }
4431 };
4432
4433 class PageMapMemoryUsageRecorder {
4434     task_t m_task;
4435     void* m_context;
4436     unsigned m_typeMask;
4437     vm_range_recorder_t* m_recorder;
4438     const RemoteMemoryReader& m_reader;
4439     const FreeObjectFinder& m_freeObjectFinder;
4440
4441     HashSet<void*> m_seenPointers;
4442     Vector<Span*> m_coalescedSpans;
4443
4444 public:
4445     PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder)
4446         : m_task(task)
4447         , m_context(context)
4448         , m_typeMask(typeMask)
4449         , m_recorder(recorder)
4450         , m_reader(reader)
4451         , m_freeObjectFinder(freeObjectFinder)
4452     { }
4453
4454     ~PageMapMemoryUsageRecorder()
4455     {
4456         ASSERT(!m_coalescedSpans.size());
4457     }
4458
4459     void recordPendingRegions()
4460     {
4461         Span* lastSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
4462         vm_range_t ptrRange = { m_coalescedSpans[0]->start << kPageShift, 0 };
4463         ptrRange.size = (lastSpan->start << kPageShift) - ptrRange.address + (lastSpan->length * kPageSize);
4464
4465         // Mark the memory region the spans represent as a candidate for containing pointers
4466         if (m_typeMask & MALLOC_PTR_REGION_RANGE_TYPE)
4467             (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1);
4468
4469         if (!(m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) {
4470             m_coalescedSpans.clear();
4471             return;
4472         }
4473
4474         Vector<vm_range_t, 1024> allocatedPointers;
4475         for (size_t i = 0; i < m_coalescedSpans.size(); ++i) {
4476             Span *theSpan = m_coalescedSpans[i];
4477             if (theSpan->free)
4478                 continue;
4479
4480             vm_address_t spanStartAddress = theSpan->start << kPageShift;
4481             vm_size_t spanSizeInBytes = theSpan->length * kPageSize;
4482
4483             if (!theSpan->sizeclass) {
4484                 // If it's an allocated large object span, mark it as in use
4485                 if (!m_freeObjectFinder.isFreeObject(spanStartAddress))
4486                     allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes});
4487             } else {
4488                 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass);
4489
4490                 // Mark each allocated small object within the span as in use
4491                 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes;
4492                 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) {
4493                     if (!m_freeObjectFinder.isFreeObject(object))
4494                         allocatedPointers.append((vm_range_t){object, objectSize});
4495                 }
4496             }
4497         }
4498
4499         (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, allocatedPointers.data(), allocatedPointers.size());
4500
4501         m_coalescedSpans.clear();
4502     }
4503
4504     int visit(void* ptr)
4505     {
4506         if (!ptr)
4507             return 1;
4508
4509         Span* span = m_reader(reinterpret_cast<Span*>(ptr));
4510         if (!span || !span->start)
4511             return 1;
4512
4513         if (m_seenPointers.contains(ptr))
4514             return span->length;
4515         m_seenPointers.add(ptr);
4516
4517         if (!m_coalescedSpans.size()) {
4518             m_coalescedSpans.append(span);
4519             return span->length;
4520         }
4521
4522         Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
4523         vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift;
4524         vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize;
4525
4526         // If the new span is adjacent to the previous span, do nothing for now.
4527         vm_address_t spanStartAddress = span->start << kPageShift;
4528         if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) {
4529             m_coalescedSpans.append(span);
4530             return span->length;
4531         }
4532
4533         // New span is not adjacent to previous span, so record the spans coalesced so far.
4534         recordPendingRegions();
4535         m_coalescedSpans.append(span);
4536
4537         return span->length;
4538     }
4539 };
4540
4541 class AdminRegionRecorder {
4542     task_t m_task;
4543     void* m_context;
4544     unsigned m_typeMask;
4545     vm_range_recorder_t* m_recorder;
4546     const RemoteMemoryReader& m_reader;
4547
4548     Vector<vm_range_t, 1024> m_pendingRegions;
4549
4550 public:
4551     AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader)
4552         : m_task(task)
4553         , m_context(context)
4554         , m_typeMask(typeMask)
4555         , m_recorder(recorder)
4556         , m_reader(reader)
4557     { }
4558
4559     void recordRegion(vm_address_t ptr, size_t size)
4560     {
4561         if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE)
4562             m_pendingRegions.append((vm_range_t){ ptr, size });
4563     }
4564
4565     void visit(void *ptr, size_t size)
4566     {
4567         recordRegion(reinterpret_cast<vm_address_t>(ptr), size);
4568     }
4569
4570     void recordPendingRegions()
4571     {
4572         if (m_pendingRegions.size()) {
4573             (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size());
4574             m_pendingRegions.clear();
4575         }
4576     }
4577
4578     ~AdminRegionRecorder()
4579     {
4580         ASSERT(!m_pendingRegions.size());
4581     }
4582 };
4583
4584 kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder)
4585 {
4586     RemoteMemoryReader memoryReader(task, reader);
4587
4588     InitSizeClasses();
4589
4590     FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress));
4591     TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap);
4592     TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps);
4593     TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer);
4594
4595     TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses);
4596
4597     FreeObjectFinder finder(memoryReader);
4598     finder.findFreeObjects(threadHeaps);
4599     finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches);
4600
4601     TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_;
4602     PageMapFreeObjectFinder pageMapFinder(memoryReader, finder);
4603     pageMap->visitValues(pageMapFinder, memoryReader);
4604
4605     PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder);
4606     pageMap->visitValues(usageRecorder, memoryReader);
4607     usageRecorder.recordPendingRegions();
4608
4609     AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder, memoryReader);
4610     pageMap->visitAllocations(adminRegionRecorder, memoryReader);
4611
4612     PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator);
4613     PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator);
4614
4615     spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4616     pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4617
4618     adminRegionRecorder.recordPendingRegions();
4619
4620     return 0;
4621 }
4622
4623 size_t FastMallocZone::size(malloc_zone_t*, const void*)
4624 {
4625     return 0;
4626 }
4627
4628 void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t)
4629 {
4630     return 0;
4631 }
4632
4633 void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t)
4634 {
4635     return 0;
4636 }
4637
4638 void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr)
4639 {
4640     // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer
4641     // is not in this zone.  When this happens, the pointer being freed was not allocated by any
4642     // zone so we need to print a useful error for the application developer.
4643     malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr);
4644 }
4645
4646 void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t)
4647 {
4648     return 0;
4649 }
4650
4651
4652 #undef malloc
4653 #undef free
4654 #undef realloc
4655 #undef calloc
4656
4657 extern "C" {
4658 malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print,
4659     &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics
4660
4661 #if OS(IOS) || __MAC_OS_X_VERSION_MAX_ALLOWED >= 1060
4662     , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher.
4663 #endif
4664 #if OS(IOS) || __MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
4665     , 0, 0, 0, 0 // These members will not be used unless the zone advertises itself as version seven or higher.
4666 #endif
4667
4668     };
4669 }
4670
4671 FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator)
4672     : m_pageHeap(pageHeap)
4673     , m_threadHeaps(threadHeaps)
4674     , m_centralCaches(centralCaches)
4675     , m_spanAllocator(spanAllocator)
4676     , m_pageHeapAllocator(pageHeapAllocator)
4677 {
4678     memset(&m_zone, 0, sizeof(m_zone));
4679     m_zone.version = 4;
4680     m_zone.zone_name = "JavaScriptCore FastMalloc";
4681     m_zone.size = &FastMallocZone::size;
4682     m_zone.malloc = &FastMallocZone::zoneMalloc;
4683     m_zone.calloc = &FastMallocZone::zoneCalloc;
4684     m_zone.realloc = &FastMallocZone::zoneRealloc;
4685     m_zone.free = &FastMallocZone::zoneFree;
4686     m_zone.valloc = &FastMallocZone::zoneValloc;
4687     m_zone.destroy = &FastMallocZone::zoneDestroy;
4688     m_zone.introspect = &jscore_fastmalloc_introspection;
4689     malloc_zone_register(&m_zone);
4690 }
4691
4692
4693 void FastMallocZone::init()
4694 {
4695     static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator);
4696 }
4697
4698 #endif // OS(DARWIN)
4699
4700 } // namespace WTF
4701 #endif // WTF_CHANGES
4702
4703 #endif // FORCE_SYSTEM_MALLOC