Improve ArrayPool performance
authorStephen Toub <stoub@microsoft.com>
Tue, 22 Nov 2016 18:44:44 +0000 (13:44 -0500)
committerStephen Toub <stoub@microsoft.com>
Fri, 25 Nov 2016 03:09:53 +0000 (22:09 -0500)
- Renames DefaultArrayPool to ConfigurableArrayPool, which remains unchanged.
- Adds a new TlsOverPerCoreLockedStacksArrayPool, which is used as the shared pool for byte[] and char[].  The pool is tiered, with a small per-thread TLS cache, followed by a global cache.  The global cache is split into effectively per-core buckets, although threads are able to check other buckets if their assigned bucket is empty/full for rents/returns, respectively.

src/mscorlib/corefx/System/Buffers/ArrayPool.cs
src/mscorlib/corefx/System/Buffers/ConfigurableArrayPool.cs [moved from src/mscorlib/corefx/System/Buffers/DefaultArrayPool.cs with 57% similarity]
src/mscorlib/corefx/System/Buffers/DefaultArrayPoolBucket.cs [deleted file]
src/mscorlib/corefx/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.Unix.cs [new file with mode: 0644]
src/mscorlib/corefx/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.Windows.cs [new file with mode: 0644]
src/mscorlib/corefx/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.cs [new file with mode: 0644]
src/mscorlib/corefx/System/Buffers/Utilities.cs
src/mscorlib/mscorlib.shared.sources.props
src/mscorlib/src/Microsoft/Win32/Win32Native.cs

index af98c20..441e48d 100644 (file)
@@ -2,9 +2,6 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-using System.Runtime.CompilerServices;
-using System.Threading;
-
 namespace System.Buffers
 {
     /// <summary>
@@ -22,9 +19,6 @@ namespace System.Buffers
     /// </remarks>
     public abstract class ArrayPool<T>
     {
-        /// <summary>The lazily-initialized shared pool instance.</summary>
-        private static ArrayPool<T> s_sharedInstance = null;
-
         /// <summary>
         /// Retrieves a shared <see cref="ArrayPool{T}"/> instance.
         /// </summary>
@@ -36,28 +30,31 @@ namespace System.Buffers
         /// existing buffer being taken from the pool if an appropriate buffer is available or in a new 
         /// buffer being allocated if one is not available.
         /// </remarks>
-        public static ArrayPool<T> Shared
-        {
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            get { return Volatile.Read(ref s_sharedInstance) ?? EnsureSharedCreated(); }
-        }
+        public static ArrayPool<T> Shared => SharedPool.Value;
 
-        /// <summary>Ensures that <see cref="s_sharedInstance"/> has been initialized to a pool and returns it.</summary>
-        [MethodImpl(MethodImplOptions.NoInlining)]
-        private static ArrayPool<T> EnsureSharedCreated()
+        /// <summary>Stores a cached pool instance for T[].</summary>
+        /// <remarks>
+        /// Separated out into a nested class to enable lazy-initialization of the pool provided by
+        /// the runtime, only forced when Shared is used (and not when Create is called or when
+        /// other non-Shared accesses happen).
+        /// </remarks>
+        private static class SharedPool
         {
-            Interlocked.CompareExchange(ref s_sharedInstance, Create(), null);
-            return s_sharedInstance;
+            /// <summary>Per-type cached pool.</summary>
+            /// <remarks>
+            /// byte[] and char[] are the most commonly pooled array types. For these we use a special pool type
+            /// optimized for very fast access speeds, at the expense of more memory consumption.
+            /// </remarks>
+            internal readonly static ArrayPool<T> Value =
+                typeof(T) == typeof(byte) || typeof(T) == typeof(char) ? new TlsOverPerCoreLockedStacksArrayPool<T>() :
+                Create();
         }
 
         /// <summary>
         /// Creates a new <see cref="ArrayPool{T}"/> instance using default configuration options.
         /// </summary>
         /// <returns>A new <see cref="ArrayPool{T}"/> instance.</returns>
-        public static ArrayPool<T> Create()
-        {
-            return new DefaultArrayPool<T>();
-        }
+        public static ArrayPool<T> Create() => new ConfigurableArrayPool<T>();
 
         /// <summary>
         /// Creates a new <see cref="ArrayPool{T}"/> instance using custom configuration options.
@@ -72,10 +69,8 @@ namespace System.Buffers
         /// The created pool will group arrays into buckets, with no more than <paramref name="maxArraysPerBucket"/>
         /// in each bucket and with those arrays not exceeding <paramref name="maxArrayLength"/> in length.
         /// </remarks>
-        public static ArrayPool<T> Create(int maxArrayLength, int maxArraysPerBucket)
-        {
-            return new DefaultArrayPool<T>(maxArrayLength, maxArraysPerBucket);
-        }
+        public static ArrayPool<T> Create(int maxArrayLength, int maxArraysPerBucket) =>
+            new ConfigurableArrayPool<T>(maxArrayLength, maxArraysPerBucket);
 
         /// <summary>
         /// Retrieves a buffer that is at least the requested length.
@@ -2,24 +2,25 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Diagnostics;
+using System.Threading;
+
 namespace System.Buffers
 {
-    internal sealed partial class DefaultArrayPool<T> : ArrayPool<T>
+    internal sealed partial class ConfigurableArrayPool<T> : ArrayPool<T>
     {
         /// <summary>The default maximum length of each array in the pool (2^20).</summary>
         private const int DefaultMaxArrayLength = 1024 * 1024;
         /// <summary>The default maximum number of arrays per bucket that are available for rent.</summary>
         private const int DefaultMaxNumberOfArraysPerBucket = 50;
-        /// <summary>Lazily-allocated empty array used when arrays of length 0 are requested.</summary>
-        private static T[] s_emptyArray; // we support contracts earlier than those with Array.Empty<T>()
 
         private readonly Bucket[] _buckets;
 
-        internal DefaultArrayPool() : this(DefaultMaxArrayLength, DefaultMaxNumberOfArraysPerBucket)
+        internal ConfigurableArrayPool() : this(DefaultMaxArrayLength, DefaultMaxNumberOfArraysPerBucket)
         {
         }
 
-        internal DefaultArrayPool(int maxArrayLength, int maxArraysPerBucket)
+        internal ConfigurableArrayPool(int maxArrayLength, int maxArraysPerBucket)
         {
             if (maxArrayLength <= 0)
             {
@@ -69,7 +70,7 @@ namespace System.Buffers
             {
                 // No need for events with the empty array.  Our pool is effectively infinite
                 // and we'll never allocate for rents and never store for returns.
-                return s_emptyArray ?? (s_emptyArray = new T[0]);
+                return EmptyArray<T>.Value;
             }
 
             var log = ArrayPoolEventSource.Log;
@@ -157,5 +158,108 @@ namespace System.Buffers
                 log.BufferReturned(array.GetHashCode(), array.Length, Id);
             }
         }
+
+        /// <summary>Provides a thread-safe bucket containing buffers that can be Rent'd and Return'd.</summary>
+        private sealed class Bucket
+        {
+            internal readonly int _bufferLength;
+            private readonly T[][] _buffers;
+            private readonly int _poolId;
+
+            private SpinLock _lock; // do not make this readonly; it's a mutable struct
+            private int _index;
+
+            /// <summary>
+            /// Creates the pool with numberOfBuffers arrays where each buffer is of bufferLength length.
+            /// </summary>
+            internal Bucket(int bufferLength, int numberOfBuffers, int poolId)
+            {
+                _lock = new SpinLock(Debugger.IsAttached); // only enable thread tracking if debugger is attached; it adds non-trivial overheads to Enter/Exit
+                _buffers = new T[numberOfBuffers][];
+                _bufferLength = bufferLength;
+                _poolId = poolId;
+            }
+
+            /// <summary>Gets an ID for the bucket to use with events.</summary>
+            internal int Id => GetHashCode();
+
+            /// <summary>Takes an array from the bucket.  If the bucket is empty, returns null.</summary>
+            internal T[] Rent()
+            {
+                T[][] buffers = _buffers;
+                T[] buffer = null;
+
+                // While holding the lock, grab whatever is at the next available index and
+                // update the index.  We do as little work as possible while holding the spin
+                // lock to minimize contention with other threads.  The try/finally is
+                // necessary to properly handle thread aborts on platforms which have them.
+                bool lockTaken = false, allocateBuffer = false;
+                try
+                {
+                    _lock.Enter(ref lockTaken);
+
+                    if (_index < buffers.Length)
+                    {
+                        buffer = buffers[_index];
+                        buffers[_index++] = null;
+                        allocateBuffer = buffer == null;
+                    }
+                }
+                finally
+                {
+                    if (lockTaken) _lock.Exit(false);
+                }
+
+                // While we were holding the lock, we grabbed whatever was at the next available index, if
+                // there was one.  If we tried and if we got back null, that means we hadn't yet allocated
+                // for that slot, in which case we should do so now.
+                if (allocateBuffer)
+                {
+                    buffer = new T[_bufferLength];
+
+                    var log = ArrayPoolEventSource.Log;
+                    if (log.IsEnabled())
+                    {
+                        log.BufferAllocated(buffer.GetHashCode(), _bufferLength, _poolId, Id,
+                            ArrayPoolEventSource.BufferAllocatedReason.Pooled);
+                    }
+                }
+
+                return buffer;
+            }
+
+            /// <summary>
+            /// Attempts to return the buffer to the bucket.  If successful, the buffer will be stored
+            /// in the bucket and true will be returned; otherwise, the buffer won't be stored, and false
+            /// will be returned.
+            /// </summary>
+            internal void Return(T[] array)
+            {
+                // Check to see if the buffer is the correct size for this bucket
+                if (array.Length != _bufferLength)
+                {
+                    throw new ArgumentException(SR.ArgumentException_BufferNotFromPool, nameof(array));
+                }
+
+                // While holding the spin lock, if there's room available in the bucket,
+                // put the buffer into the next available slot.  Otherwise, we just drop it.
+                // The try/finally is necessary to properly handle thread aborts on platforms
+                // which have them.
+                bool lockTaken = false;
+                try
+                {
+                    _lock.Enter(ref lockTaken);
+
+                    if (_index != 0)
+                    {
+                        _buffers[--_index] = array;
+                    }
+                }
+                finally
+                {
+                    if (lockTaken) _lock.Exit(false);
+                }
+            }
+        }
     }
 }
diff --git a/src/mscorlib/corefx/System/Buffers/DefaultArrayPoolBucket.cs b/src/mscorlib/corefx/System/Buffers/DefaultArrayPoolBucket.cs
deleted file mode 100644 (file)
index e0a1abb..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System.Diagnostics;
-using System.Threading;
-
-namespace System.Buffers
-{
-    internal sealed partial class DefaultArrayPool<T> : ArrayPool<T>
-    {
-        /// <summary>Provides a thread-safe bucket containing buffers that can be Rent'd and Return'd.</summary>
-        private sealed class Bucket
-        {
-            internal readonly int _bufferLength;
-            private readonly T[][] _buffers;
-            private readonly int _poolId;
-
-            private SpinLock _lock; // do not make this readonly; it's a mutable struct
-            private int _index;
-
-            /// <summary>
-            /// Creates the pool with numberOfBuffers arrays where each buffer is of bufferLength length.
-            /// </summary>
-            internal Bucket(int bufferLength, int numberOfBuffers, int poolId)
-            {
-                _lock = new SpinLock(Debugger.IsAttached); // only enable thread tracking if debugger is attached; it adds non-trivial overheads to Enter/Exit
-                _buffers = new T[numberOfBuffers][];
-                _bufferLength = bufferLength;
-                _poolId = poolId;
-            }
-
-            /// <summary>Gets an ID for the bucket to use with events.</summary>
-            internal int Id => GetHashCode();
-
-            /// <summary>Takes an array from the bucket.  If the bucket is empty, returns null.</summary>
-            internal T[] Rent()
-            {
-                T[][] buffers = _buffers;
-                T[] buffer = null;
-
-                // While holding the lock, grab whatever is at the next available index and
-                // update the index.  We do as little work as possible while holding the spin
-                // lock to minimize contention with other threads.  The try/finally is
-                // necessary to properly handle thread aborts on platforms which have them.
-                bool lockTaken = false, allocateBuffer = false;
-                try
-                {
-                    _lock.Enter(ref lockTaken);
-
-                    if (_index < buffers.Length)
-                    {
-                        buffer = buffers[_index];
-                        buffers[_index++] = null;
-                        allocateBuffer = buffer == null;
-                    }
-                }
-                finally
-                {
-                    if (lockTaken) _lock.Exit(false);
-                }
-
-                // While we were holding the lock, we grabbed whatever was at the next available index, if
-                // there was one.  If we tried and if we got back null, that means we hadn't yet allocated
-                // for that slot, in which case we should do so now.
-                if (allocateBuffer)
-                {
-                    buffer = new T[_bufferLength];
-
-                    var log = ArrayPoolEventSource.Log;
-                    if (log.IsEnabled())
-                    {
-                        log.BufferAllocated(buffer.GetHashCode(), _bufferLength, _poolId, Id,
-                            ArrayPoolEventSource.BufferAllocatedReason.Pooled);
-                    }
-                }
-
-                return buffer;
-            }
-
-            /// <summary>
-            /// Attempts to return the buffer to the bucket.  If successful, the buffer will be stored
-            /// in the bucket and true will be returned; otherwise, the buffer won't be stored, and false
-            /// will be returned.
-            /// </summary>
-            internal void Return(T[] array)
-            {
-                // Check to see if the buffer is the correct size for this bucket
-                if (array.Length != _bufferLength)
-                {
-                    throw new ArgumentException(SR.ArgumentException_BufferNotFromPool, nameof(array));
-                }
-
-                // While holding the spin lock, if there's room available in the bucket,
-                // put the buffer into the next available slot.  Otherwise, we just drop it.
-                // The try/finally is necessary to properly handle thread aborts on platforms
-                // which have them.
-                bool lockTaken = false;
-                try
-                {
-                    _lock.Enter(ref lockTaken);
-
-                    if (_index != 0)
-                    {
-                        _buffers[--_index] = array;
-                    }
-                }
-                finally
-                {
-                    if (lockTaken) _lock.Exit(false);
-                }
-            }
-        }
-    }
-}
diff --git a/src/mscorlib/corefx/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.Unix.cs b/src/mscorlib/corefx/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.Unix.cs
new file mode 100644 (file)
index 0000000..aa729e8
--- /dev/null
@@ -0,0 +1,28 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.Win32;
+using System.Runtime.CompilerServices;
+
+namespace System.Buffers
+{
+    internal sealed partial class TlsOverPerCoreLockedStacksArrayPool<T>
+    {
+        /// <summary>Get an identifier for the current thread to use to index into the stacks.</summary>
+        private static int ExecutionId
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get
+            {
+                // On Unix, GetCurrentProcessorNumber is implemented in terms of sched_getcpu, which
+                // doesn't exist on all platforms.  On those it doesn't exist on, GetCurrentProcessorNumber
+                // returns -1.  As a fallback in that case and to spread the threads across the buckets
+                // by default, we use the current managed thread ID as a proxy.
+                int id = Win32Native.GetCurrentProcessorNumber();
+                if (id < 0) id = Environment.CurrentManagedThreadId;
+                return id;
+            }
+        }
+    }
+}
diff --git a/src/mscorlib/corefx/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.Windows.cs b/src/mscorlib/corefx/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.Windows.cs
new file mode 100644 (file)
index 0000000..0c4c464
--- /dev/null
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.Win32;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+namespace System.Buffers
+{
+    internal sealed partial class TlsOverPerCoreLockedStacksArrayPool<T> : ArrayPool<T>
+    {
+        /// <summary>Get an identifier for the current thread to use to index into the stacks.</summary>
+        private static int ExecutionId
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get { return Win32Native.GetCurrentProcessorNumber(); }
+        }
+    }
+}
diff --git a/src/mscorlib/corefx/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.cs b/src/mscorlib/corefx/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.cs
new file mode 100644 (file)
index 0000000..a4bed13
--- /dev/null
@@ -0,0 +1,293 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.Win32;
+using System.Runtime.CompilerServices;
+using System.Threading;
+
+namespace System.Buffers
+{
+    /// <summary>
+    /// Provides an ArrayPool implementation meant to be used as the singleton returned from ArrayPool.Shared.
+    /// </summary>
+    /// <remarks>
+    /// The implementation uses a tiered caching scheme, with a small per-thread cache for each array size, followed
+    /// by a cache per array size shared by all threads, split into per-core stacks meant to be used by threads
+    /// running on that core.  Locks are used to protect each per-core stack, because a thread can migrate after
+    /// checking its processor number, because multiple threads could interleave on the same core, and because
+    /// a thread is allowed to check other core's buckets if its core's bucket is empty/full.
+    /// </remarks>
+    internal sealed partial class TlsOverPerCoreLockedStacksArrayPool<T> : ArrayPool<T>
+    {
+        // TODO: #7747: "Investigate optimizing ArrayPool heuristics"
+        // - Explore caching in TLS more than one array per size per thread, and moving stale buffers to the global queue.
+        // - Explore dumping stale buffers from the global queue, similar to PinnableBufferCache (maybe merging them).
+        // - Explore changing the size of each per-core bucket, potentially dynamically or based on other factors like array size.
+        // - Explore changing number of buckets and what sizes of arrays are cached.
+        // - Measure making GetCurrentProcessorNumber an FCall rather than a P/Invoke.
+        // - Investigate whether false sharing is causing any issues, in particular on LockedStack's count and the contents of its array.
+        // ...
+
+        /// <summary>The number of buckets (array sizes) in the pool, one for each array length, starting from length 16.</summary>
+        private const int NumBuckets = 17; // Utilities.SelectBucketIndex(2*1024*1024)
+        /// <summary>Maximum number of per-core stacks to use per array size.</summary>
+        private const int MaxPerCorePerArraySizeStacks = 64; // selected to avoid needing to worry about processor groups
+        /// <summary>The maximum number of buffers to store in a bucket's global queue.</summary>
+        private const int MaxBuffersPerArraySizePerCore = 8;
+
+        /// <summary>The length of arrays stored in the corresponding indices in <see cref="_buckets"/> and <see cref="t_tlsBuckets"/>.</summary>
+        private readonly int[] _bucketArraySizes;
+        /// <summary>
+        /// An array of per-core array stacks. The slots are lazily initialized to avoid creating
+        /// lots of overhead for unused array sizes.
+        /// </summary>
+        private readonly PerCoreLockedStacks[] _buckets = new PerCoreLockedStacks[NumBuckets];
+        /// <summary>A per-thread array of arrays, to cache one array per array size per thread.</summary>
+        [ThreadStatic]
+        private static T[][] t_tlsBuckets;
+
+        /// <summary>Initialize the pool.</summary>
+        public TlsOverPerCoreLockedStacksArrayPool()
+        {
+            var sizes = new int[NumBuckets];
+            for (int i = 0; i < sizes.Length; i++)
+            {
+                sizes[i] = Utilities.GetMaxSizeForBucket(i);
+            }
+            _bucketArraySizes = sizes;
+        }
+
+        /// <summary>Allocate a new PerCoreLockedStacks and try to store it into the <see cref="_buckets"/> array.</summary>
+        private PerCoreLockedStacks CreatePerCoreLockedStacks(int bucketIndex)
+        {
+            var inst = new PerCoreLockedStacks();
+            return Interlocked.CompareExchange(ref _buckets[bucketIndex], inst, null) ?? inst;
+        }
+
+        /// <summary>Gets an ID for the pool to use with events.</summary>
+        private int Id => GetHashCode();
+
+        public override T[] Rent(int minimumLength)
+        {
+            // Arrays can't be smaller than zero.  We allow requesting zero-length arrays (even though
+            // pooling such an array isn't valuable) as it's a valid length array, and we want the pool
+            // to be usable in general instead of using `new`, even for computed lengths.
+            if (minimumLength < 0)
+            {
+                throw new ArgumentOutOfRangeException(nameof(minimumLength));
+            }
+            else if (minimumLength == 0)
+            {
+                // No need to log the empty array.  Our pool is effectively infinite
+                // and we'll never allocate for rents and never store for returns.
+                return EmptyArray<T>.Value;
+            }
+
+            ArrayPoolEventSource log = ArrayPoolEventSource.Log;
+            T[] buffer;
+
+            // Get the bucket number for the array length
+            int bucketIndex = Utilities.SelectBucketIndex(minimumLength);
+
+            // If the array could come from a bucket...
+            if (bucketIndex < _buckets.Length)
+            {
+                // First try to get it from TLS if possible.
+                T[][] tlsBuckets = t_tlsBuckets;
+                if (tlsBuckets != null)
+                {
+                    buffer = tlsBuckets[bucketIndex];
+                    if (buffer != null)
+                    {
+                        tlsBuckets[bucketIndex] = null;
+                        if (log.IsEnabled())
+                        {
+                            log.BufferRented(buffer.GetHashCode(), buffer.Length, Id, bucketIndex);
+                        }
+                        return buffer;
+                    }
+                }
+
+                // We couldn't get a buffer from TLS, so try the global stack.
+                PerCoreLockedStacks b = _buckets[bucketIndex];
+                if (b != null)
+                {
+                    buffer = b.TryPop();
+                    if (buffer != null)
+                    {
+                        if (log.IsEnabled())
+                        {
+                            log.BufferRented(buffer.GetHashCode(), buffer.Length, Id, bucketIndex);
+                        }
+                        return buffer;
+                    }
+                }
+
+                // No buffer available.  Allocate a new buffer with a size corresponding to the appropriate bucket.
+                buffer = new T[_bucketArraySizes[bucketIndex]];
+            }
+            else
+            {
+                // The request was for a size too large for the pool.  Allocate an array of exactly the requested length.
+                // When it's returned to the pool, we'll simply throw it away.
+                buffer = new T[minimumLength];
+            }
+
+            if (log.IsEnabled())
+            {
+                int bufferId = buffer.GetHashCode(), bucketId = -1; // no bucket for an on-demand allocated buffer
+                log.BufferRented(bufferId, buffer.Length, Id, bucketId);
+                log.BufferAllocated(bufferId, buffer.Length, Id, bucketId, bucketIndex >= _buckets.Length ?
+                    ArrayPoolEventSource.BufferAllocatedReason.OverMaximumSize :
+                    ArrayPoolEventSource.BufferAllocatedReason.PoolExhausted);
+            }
+
+            return buffer;
+        }
+
+        public override void Return(T[] array, bool clearArray = false)
+        {
+            if (array == null)
+            {
+                throw new ArgumentNullException(nameof(array));
+            }
+
+            // Determine with what bucket this array length is associated
+            int bucketIndex = Utilities.SelectBucketIndex(array.Length);
+
+            // If we can tell that the buffer was allocated (or empty), drop it. Otherwise, check if we have space in the pool.
+            if (bucketIndex < _buckets.Length)
+            {
+                // Clear the array if the user requests.
+                if (clearArray)
+                {
+                    Array.Clear(array, 0, array.Length);
+                }
+
+                // Check to see if the buffer is the correct size for this bucket
+                if (array.Length != _bucketArraySizes[bucketIndex])
+                {
+                    throw new ArgumentException(SR.ArgumentException_BufferNotFromPool, nameof(array));
+                }
+
+                // Write through the TLS bucket.  If there weren't any buckets, create them
+                // and store this array into it.  If there were, store this into it, and
+                // if there was a previous one there, push that to the global stack.  This
+                // helps to keep LIFO access such that the most recently pushed stack will
+                // be in TLS and the first to be popped next.
+                T[][] tlsBuckets = t_tlsBuckets;
+                if (tlsBuckets == null)
+                {
+                    t_tlsBuckets = tlsBuckets = new T[NumBuckets][];
+                    tlsBuckets[bucketIndex] = array;
+                }
+                else
+                {
+                    T[] prev = tlsBuckets[bucketIndex];
+                    tlsBuckets[bucketIndex] = array;
+                    if (prev != null)
+                    {
+                        PerCoreLockedStacks bucket = _buckets[bucketIndex] ?? CreatePerCoreLockedStacks(bucketIndex);
+                        bucket.TryPush(prev);
+                    }
+                }
+            }
+
+            // Log that the buffer was returned
+            ArrayPoolEventSource log = ArrayPoolEventSource.Log;
+            if (log.IsEnabled())
+            {
+                log.BufferReturned(array.GetHashCode(), array.Length, Id);
+            }
+        }
+
+        /// <summary>
+        /// Stores a set of stacks of arrays, with one stack per core.
+        /// </summary>
+        private sealed class PerCoreLockedStacks
+        {
+            /// <summary>The stacks.</summary>
+            private readonly LockedStack[] _perCoreStacks;
+
+            /// <summary>Initializes the stacks.</summary>
+            public PerCoreLockedStacks()
+            {
+                // Create the stacks.  We create as many as there are processors, limited by our max.
+                var stacks = new LockedStack[Math.Min(Environment.ProcessorCount, MaxPerCorePerArraySizeStacks)];
+                for (int i = 0; i < stacks.Length; i++)
+                {
+                    stacks[i] = new LockedStack();
+                }
+                _perCoreStacks = stacks;
+            }
+
+            /// <summary>Try to push the array into the stacks. If each is full when it's tested, the array will be dropped.</summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public void TryPush(T[] array)
+            {
+                // Try to push on to the associated stack first.  If that fails,
+                // round-robin through the other stacks.
+                LockedStack[] stacks = _perCoreStacks;
+                int index = ExecutionId % stacks.Length;
+                for (int i = 0; i < stacks.Length; i++)
+                {
+                    if (stacks[index].TryPush(array)) return;
+                    if (++index == stacks.Length) index = 0;
+                }
+            }
+
+            /// <summary>Try to get an array from the stacks.  If each is empty when it's tested, null will be returned.</summary>
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public T[] TryPop()
+            {
+                // Try to pop from the associated stack first.  If that fails,
+                // round-robin through the other stacks.
+                T[] arr;
+                LockedStack[] stacks = _perCoreStacks;
+                int index = ExecutionId % stacks.Length;
+                for (int i = 0; i < stacks.Length; i++)
+                {
+                    if ((arr = stacks[index].TryPop()) != null) return arr;
+                    if (++index == stacks.Length) index = 0;
+                }
+                return null;
+            }
+        }
+
+        /// <summary>Provides a simple stack of arrays, protected by a lock.</summary>
+        private sealed class LockedStack
+        {
+            private readonly T[][] _arrays = new T[MaxBuffersPerArraySizePerCore][];
+            private int _count;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public bool TryPush(T[] array)
+            {
+                bool enqueued = false;
+                Monitor.Enter(this);
+                if (_count < MaxBuffersPerArraySizePerCore)
+                {
+                    _arrays[_count++] = array;
+                    enqueued = true;
+                }
+                Monitor.Exit(this);
+                return enqueued;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public T[] TryPop()
+            {
+                T[] arr = null;
+                Monitor.Enter(this);
+                if (_count > 0)
+                {
+                    arr = _arrays[--_count];
+                    _arrays[_count] = null;
+                }
+                Monitor.Exit(this);
+                return arr;
+            }
+        }
+    }
+}
index 63ae1a9..823299f 100644 (file)
@@ -12,8 +12,6 @@ namespace System.Buffers
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static int SelectBucketIndex(int bufferSize)
         {
-            Debug.Assert(bufferSize > 0);
-
             uint bitsRemaining = ((uint)bufferSize - 1) >> 4;
 
             int poolIndex = 0;
index 015cc46..4bd229b 100644 (file)
   <ItemGroup>
     <BuffersSources Include="$(CoreFxSourcesRoot)\System\Buffers\ArrayPool.cs" />
     <BuffersSources Include="$(CoreFxSourcesRoot)\System\Buffers\ArrayPoolEventSource.cs" />
-    <BuffersSources Include="$(CoreFxSourcesRoot)\System\Buffers\DefaultArrayPool.cs" />
-    <BuffersSources Include="$(CoreFxSourcesRoot)\System\Buffers\DefaultArrayPoolBucket.cs" />
+    <BuffersSources Include="$(CoreFxSourcesRoot)\System\Buffers\ConfigurableArrayPool.cs" />
+    <BuffersSources Include="$(CoreFxSourcesRoot)\System\Buffers\TlsOverPerCoreLockedStacksArrayPool.cs" />
+    <BuffersSources Include="$(CoreFxSourcesRoot)\System\Buffers\TlsOverPerCoreLockedStacksArrayPool.Windows.cs" Condition="'$(TargetsUnix)' != 'true'" />
+    <BuffersSources Include="$(CoreFxSourcesRoot)\System\Buffers\TlsOverPerCoreLockedStacksArrayPool.Unix.cs" Condition="'$(TargetsUnix)' == 'true'" />
     <BuffersSources Include="$(CoreFxSourcesRoot)\System\Buffers\Utilities.cs" />
     <SecuritySources Include="$(CoreFxSourcesRoot)\System\Security\CryptographicException.cs" />
   </ItemGroup>
index 0931cba..cb15ee8 100644 (file)
@@ -725,6 +725,9 @@ namespace Microsoft.Win32 {
         [DllImport(KERNEL32, SetLastError=true)]
         internal static extern void GetSystemInfo(ref SYSTEM_INFO lpSystemInfo);
 
+        [DllImport(KERNEL32)]
+        internal static extern int GetCurrentProcessorNumber();
+
         [DllImport(KERNEL32, CharSet=CharSet.Auto, BestFitMapping=true)]
         internal static extern int FormatMessage(int dwFlags, IntPtr lpSource,
                     int dwMessageId, int dwLanguageId, [Out]StringBuilder lpBuffer,