Committing TBB 2019 Update 2 source code
authortbbdev <inteltbbdevelopers@intel.com>
Wed, 7 Nov 2018 11:38:02 +0000 (14:38 +0300)
committertbbdev <inteltbbdevelopers@intel.com>
Wed, 7 Nov 2018 11:38:02 +0000 (14:38 +0300)
31 files changed:
CHANGES
README.md
doc/Release_Notes.txt
include/tbb/concurrent_hash_map.h
include/tbb/concurrent_priority_queue.h
include/tbb/concurrent_queue.h
include/tbb/concurrent_vector.h
include/tbb/parallel_reduce.h
include/tbb/tbb_config.h
include/tbb/tbb_stddef.h
src/rml/server/thread_monitor.h
src/tbb/concurrent_monitor.cpp
src/tbb/concurrent_monitor.h
src/tbb/market.cpp
src/tbb/market.h
src/tbb/private_server.cpp
src/tbb/semaphore.h
src/tbbmalloc/backend.cpp
src/tbbmalloc/frontend.cpp
src/tbbmalloc/tbbmalloc_internal.h
src/test/test_blocked_range.cpp
src/test/test_blocked_range2d.cpp
src/test/test_blocked_range3d.cpp
src/test/test_concurrent_hash_map.cpp
src/test/test_concurrent_priority_queue.cpp
src/test/test_concurrent_queue.cpp
src/test/test_concurrent_vector.cpp
src/test/test_malloc_whitebox.cpp
src/test/test_task_arena.cpp
src/test/test_tbb_header.cpp
src/test/test_tbb_version.cpp

diff --git a/CHANGES b/CHANGES
index 3cc1f5f..5105e3c 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -2,6 +2,30 @@
 The list of most significant changes made over time in
 Intel(R) Threading Building Blocks (Intel(R) TBB).
 
+Intel TBB 2019 Update 2
+TBB_INTERFACE_VERSION == 11002
+
+Changes (w.r.t. Intel TBB 2019 Update 1):
+
+- Added constructors with HashCompare argument to concurrent_hash_map
+    (https://github.com/01org/tbb/pull/63).
+- Added overloads for parallel_reduce with default partitioner and
+    user-supplied context.
+- Added deduction guides for tbb containers: concurrent_vector,
+    concurrent_queue, concurrent_bounded_queue,
+    concurrent_priority_queue.
+- Reallocation of memory objects >1MB now copies and frees memory if
+    the size is decreased twice or more, trading performance off for
+    reduced memory usage.
+- After a period of sleep, TBB worker threads now prefer returning to
+    their last used task arena.
+
+Bugs fixed:
+
+- Fixed compilation of task_group.h when targeting macOS* 10.11 or
+    earlier (https://github.com/conda-forge/tbb-feedstock/issues/42).
+
+------------------------------------------------------------------------
 Intel TBB 2019 Update 1
 TBB_INTERFACE_VERSION == 11001
 
@@ -27,6 +51,9 @@ Bugs fixed:
     observer.
 - Fixed compilation of task_group.h by Visual C++* 15.7 with
     /permissive- option (https://github.com/01org/tbb/issues/53).
+- Fixed tbb4py to avoid dependency on Intel(R) C++ Compiler shared
+    libraries.
+- Fixed compilation for Anaconda environment with GCC 7.3 and higher.
 
 ------------------------------------------------------------------------
 Intel TBB 2019
index a502785..4a9d1bd 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
-# Threading Building Blocks 2019 Update 1
-[![Stable release](https://img.shields.io/badge/version-2019_U1-green.svg)](https://github.com/01org/tbb/releases/tag/2019_U1)
+# Threading Building Blocks 2019 Update 2
+[![Stable release](https://img.shields.io/badge/version-2019_U2-green.svg)](https://github.com/01org/tbb/releases/tag/2019_U2)
 [![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE)
 
 Threading Building Blocks (TBB) lets you easily write parallel C++ programs that take
index 045f031..aba73d9 100644 (file)
@@ -92,7 +92,7 @@ Software - Supported Compilers
             GNU Compilers (gcc) 4.1 - 7.1
             GNU C Library (glibc) version 2.4 - 2.19
     Xcode* 7.0 - 9.1
-    Android* NDK r10e - r16
+    Android* NDK r10e - r17b
 
 Software - Supported Performance Analysis Tools
 
index f84dfd3..c9030e7 100644 (file)
@@ -759,9 +759,19 @@ public:
         : internal::hash_map_base(), my_allocator(a)
     {}
 
+    explicit concurrent_hash_map( const HashCompare& compare, const allocator_type& a = allocator_type() )
+        : internal::hash_map_base(), my_allocator(a), my_hash_compare(compare)
+    {}
+
     //! Construct empty table with n preallocated buckets. This number serves also as initial concurrency level.
     concurrent_hash_map( size_type n, const allocator_type &a = allocator_type() )
-        : my_allocator(a)
+        : internal::hash_map_base(), my_allocator(a)
+    {
+        reserve( n );
+    }
+
+    concurrent_hash_map( size_type n, const HashCompare& compare, const allocator_type& a = allocator_type() )
+        : internal::hash_map_base(), my_allocator(a), my_hash_compare(compare)
     {
         reserve( n );
     }
@@ -800,7 +810,16 @@ public:
     //! Construction with copying iteration range and given allocator instance
     template<typename I>
     concurrent_hash_map( I first, I last, const allocator_type &a = allocator_type() )
-        : my_allocator(a)
+        : internal::hash_map_base(), my_allocator(a)
+    {
+        call_clear_on_leave scope_guard(this);
+        internal_copy(first, last, std::distance(first, last));
+        scope_guard.dismiss();
+    }
+
+    template<typename I>
+    concurrent_hash_map( I first, I last, const HashCompare& compare, const allocator_type& a = allocator_type() )
+        : internal::hash_map_base(), my_allocator(a), my_hash_compare(compare)
     {
         call_clear_on_leave scope_guard(this);
         internal_copy(first, last, std::distance(first, last));
@@ -810,7 +829,15 @@ public:
 #if __TBB_INITIALIZER_LISTS_PRESENT
     //! Construct empty table with n preallocated buckets. This number serves also as initial concurrency level.
     concurrent_hash_map( std::initializer_list<value_type> il, const allocator_type &a = allocator_type() )
-        : my_allocator(a)
+        : internal::hash_map_base(), my_allocator(a)
+    {
+        call_clear_on_leave scope_guard(this);
+        internal_copy(il.begin(), il.end(), il.size());
+        scope_guard.dismiss();
+    }
+
+    concurrent_hash_map( std::initializer_list<value_type> il, const HashCompare& compare, const allocator_type& a = allocator_type() )
+        : internal::hash_map_base(), my_allocator(a), my_hash_compare(compare)
     {
         call_clear_on_leave scope_guard(this);
         internal_copy(il.begin(), il.end(), il.size());
index caab8bb..ad4e85d 100644 (file)
@@ -121,7 +121,7 @@ class concurrent_priority_queue {
 
     //! Copy constructor
     /** This operation is unsafe if there are pending concurrent operations on the src queue. */
-    explicit concurrent_priority_queue(const concurrent_priority_queue& src) : mark(src.mark),
+    concurrent_priority_queue(const concurrent_priority_queue& src) : mark(src.mark),
         my_size(src.my_size), data(src.data.begin(), src.data.end(), src.data.get_allocator())
     {
         my_aggregator.initialize_handler(my_functor_t(this));
@@ -481,6 +481,14 @@ class concurrent_priority_queue {
     }
 };
 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+// Deduction guide for the constructor from two iterators
+template<typename InputIterator,
+         typename T = typename std::iterator_traits<InputIterator>::value_type,
+         typename A = cache_aligned_allocator<T>
+> concurrent_priority_queue(InputIterator, InputIterator, const A& = A())
+-> concurrent_priority_queue<T, std::less<T>, A>;
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
 } // namespace interface5
 
 using interface5::concurrent_priority_queue;
index 0e9bc35..98b434b 100644 (file)
@@ -177,6 +177,15 @@ public:
     const_iterator unsafe_end() const {return const_iterator();}
 } ;
 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+// Deduction guide for the constructor from two iterators
+template<typename InputIterator,
+         typename T = typename std::iterator_traits<InputIterator>::value_type,
+         typename A = cache_aligned_allocator<T>
+> concurrent_queue(InputIterator, InputIterator, const A& = A())
+-> concurrent_queue<T, A>;
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+
 template<typename T, class A>
 concurrent_queue<T,A>::~concurrent_queue() {
     clear();
@@ -439,6 +448,15 @@ public:
 
 };
 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+// guide for concurrent_bounded_queue(InputIterator, InputIterator, ...)
+template<typename InputIterator,
+         typename T = typename std::iterator_traits<InputIterator>::value_type,
+         typename A = cache_aligned_allocator<T>
+> concurrent_bounded_queue(InputIterator, InputIterator, const A& = A())
+-> concurrent_bounded_queue<T, A>;
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+
 template<typename T, class A>
 concurrent_bounded_queue<T,A>::~concurrent_bounded_queue() {
     clear();
index 5d8f1d4..e4a9448 100644 (file)
@@ -1156,6 +1156,25 @@ private:
     };
 };
 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+// Deduction guide for the constructor from two iterators
+template<typename I,
+         typename T = typename std::iterator_traits<I>::value_type,
+         typename A = cache_aligned_allocator<T>
+> concurrent_vector(I, I, const A& = A())
+-> concurrent_vector<T, A>;
+
+// Deduction guide for the constructor from a vector and allocator
+template<typename T, typename A1, typename A2>
+concurrent_vector(const concurrent_vector<T, A1> &, const A2 &)
+-> concurrent_vector<T, A2>;
+
+// Deduction guide for the constructor from an initializer_list
+template<typename T, typename A = cache_aligned_allocator<T>
+> concurrent_vector(std::initializer_list<T>, const A& = A())
+-> concurrent_vector<T, A>;
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+
 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
 #pragma warning (push)
 #pragma warning (disable: 4701) // potentially uninitialized local variable "old"
index 52afc6a..9429d19 100644 (file)
@@ -393,6 +393,13 @@ void parallel_reduce( const Range& range, Body& body, affinity_partitioner& part
 }
 
 #if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration with reduction, default partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, task_group_context& context ) {
+    internal::start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
+}
+
 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
 /** @ingroup algorithms **/
 template<typename Range, typename Body>
@@ -480,6 +487,17 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody
 }
 
 #if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration with reduction, default partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       task_group_context& context ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
+                          ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
+    return body.result();
+}
+
 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
 /** @ingroup algorithms **/
 template<typename Range, typename Value, typename RealBody, typename Reduction>
index 74f59c1..dd585c7 100644 (file)
@@ -56,7 +56,7 @@
 #endif
 
 #if __clang__
-    /** according to clang documentation, version can be vendor specific **/
+    // according to clang documentation, version can be vendor specific
     #define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
 #endif
 
     #define __TBB_IOS 1
 #endif
 
+#if __APPLE__
+    #if __INTEL_COMPILER && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1099 \
+                         && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101000
+        // ICC does not correctly set the macro if -mmacosx-min-version is not specified
+        #define __TBB_MACOS_TARGET_VERSION  (100000 + 10*(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ - 1000))
+    #else
+        #define __TBB_MACOS_TARGET_VERSION  __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__
+    #endif
+#endif
+
 /** Preprocessor symbols to determine HW architecture **/
 
 #if _WIN32||_WIN64
     #define __TBB_ALIGNAS_PRESENT                           (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1500)
     #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT            (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1210)
     #define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT            (__cplusplus >= 201402L)
+    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT            __INTEL_COMPILER > 1900
 #elif __clang__
 /** TODO: these options need to be rechecked **/
     #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT          __has_feature(__cxx_variadic_templates__)
     #define __TBB_ALIGNAS_PRESENT                           __has_feature(cxx_alignas)
     #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT            __has_feature(cxx_alias_templates)
     #define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT            (__cplusplus >= 201402L)
+    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT            (__has_feature(__cpp_deduction_guides))
 #elif __GNUC__
     #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT          __GXX_EXPERIMENTAL_CXX0X__
     #define __TBB_CPP11_VARIADIC_FIXED_LENGTH_EXP_PRESENT   (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40700)
     #define __TBB_ALIGNAS_PRESENT                           (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40800)
     #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT            (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40700)
     #define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT            (__cplusplus >= 201402L     && __TBB_GCC_VERSION >= 50000)
+    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT            (__cpp_deduction_guides >= 201606)
 #elif _MSC_VER
     // These definitions are also used with Intel C++ Compiler in "default" mode (__INTEL_CXX11_MODE__ == 0);
     // see a comment in "__INTEL_COMPILER" section above.
     #define __TBB_ALIGNAS_PRESENT                           (_MSC_VER >= 1900)
     #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT            (_MSC_VER >= 1800)
     #define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT            (_MSC_VER >= 1900)
+    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT            (_MSVC_LANG >= 201703L)
 #else
     #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT          0
     #define __TBB_CPP11_RVALUE_REF_PRESENT                  0
     #define __TBB_ALIGNAS_PRESENT                           0
     #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT            0
     #define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT            (__cplusplus >= 201402L)
+    #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT            0
 #endif
 
 // C++11 standard library features
 
 #define __TBB_CPP11_GET_NEW_HANDLER_PRESENT                 (_MSC_VER >= 1900 || __TBB_GLIBCXX_VERSION >= 40900 && __GXX_EXPERIMENTAL_CXX0X__ || _LIBCPP_VERSION)
 
-#define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT             (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions || _LIBCPP_VERSION >= 3700)
+#define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT             (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions \
+                                                            || _LIBCPP_VERSION >= 3700 && (!__TBB_MACOS_TARGET_VERSION || __TBB_MACOS_TARGET_VERSION >= 101200))
 
 // std::swap is in <utility> only since C++11, though MSVC had it at least since VS2005
 #if _MSC_VER>=1400 || _LIBCPP_VERSION || __GXX_EXPERIMENTAL_CXX0X__
index 37961df..a177421 100644 (file)
@@ -26,7 +26,7 @@
 #define TBB_VERSION_MINOR 0
 
 // Engineering-focused interface version
-#define TBB_INTERFACE_VERSION 11001
+#define TBB_INTERFACE_VERSION 11002
 #define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000
 
 // The oldest major interface version still supported
index 4ddd5bf..2b59bb7 100644 (file)
@@ -78,7 +78,7 @@ public:
         friend class thread_monitor;
         tbb::atomic<size_t> my_epoch;
     };
-    thread_monitor() : spurious(false), my_sema() {
+    thread_monitor() : skipped_wakeup(false), my_sema() {
         my_cookie.my_epoch = 0;
         ITT_SYNC_CREATE(&my_sema, SyncType_RML, SyncObj_ThreadMonitor);
         in_wait = false;
@@ -129,9 +129,9 @@ public:
     //! Detach thread
     static void detach_thread(handle_type handle);
 private:
-    cookie my_cookie;
-    tbb::atomic<bool>   in_wait;
-    bool   spurious;
+    cookie my_cookie; // epoch counter
+    tbb::atomic<bool> in_wait;
+    bool skipped_wakeup;
     tbb::internal::binary_semaphore my_sema;
 #if USE_PTHREAD
     static void check( int error_code, const char* routine );
@@ -244,24 +244,25 @@ inline void thread_monitor::notify() {
 }
 
 inline void thread_monitor::prepare_wait( cookie& c ) {
-    if( spurious ) {
-        spurious = false;
-        //  consumes a spurious posted signal. don't wait on my_sema.
-        my_sema.P();
+    if( skipped_wakeup ) {
+        // Lazily consume a signal that was skipped due to cancel_wait
+        skipped_wakeup = false;
+        my_sema.P(); // does not really wait on the semaphore
     }
     c = my_cookie;
-    in_wait = true;
-   __TBB_full_memory_fence();
+    in_wait.store<tbb::full_fence>( true );
 }
 
 inline void thread_monitor::commit_wait( cookie& c ) {
-    bool do_it = ( c.my_epoch == my_cookie.my_epoch);
+    bool do_it = ( c.my_epoch == my_cookie.my_epoch );
     if( do_it ) my_sema.P();
     else        cancel_wait();
 }
 
 inline void thread_monitor::cancel_wait() {
-    spurious = ! in_wait.fetch_and_store( false );
+    // if not in_wait, then some thread has sent us a signal;
+    // it will be consumed by the next prepare_wait call
+    skipped_wakeup = ! in_wait.fetch_and_store( false );
 }
 
 } // namespace internal
index 375e7b7..e8501b0 100644 (file)
@@ -36,9 +36,9 @@ concurrent_monitor::~concurrent_monitor() {
 void concurrent_monitor::prepare_wait( thread_context& thr, uintptr_t ctx ) {
     if( !thr.ready )
         thr.init();
-    // this is good place to pump previous spurious wakeup
-    else if( thr.spurious ) {
-        thr.spurious = false;
+    // this is good place to pump previous skipped wakeup
+    else if( thr.skipped_wakeup ) {
+        thr.skipped_wakeup = false;
         thr.semaphore().P();
     }
     thr.context = ctx;
@@ -52,18 +52,17 @@ void concurrent_monitor::prepare_wait( thread_context& thr, uintptr_t ctx ) {
 }
 
 void concurrent_monitor::cancel_wait( thread_context& thr ) {
-    // spurious wakeup will be pumped in the following prepare_wait()
-    thr.spurious = true;
+    // possible skipped wakeup will be pumped in the following prepare_wait()
+    thr.skipped_wakeup = true;
     // try to remove node from waitset
     bool th_in_waitset = thr.in_waitset;
     if( th_in_waitset ) {
         tbb::spin_mutex::scoped_lock l( mutex_ec );
         if (thr.in_waitset) {
-            // successfully removed from waitset,
-            // so there will be no spurious wakeup
-            thr.in_waitset = false;
-            thr.spurious = false;
             waitset_ec.remove( (waitset_t::node_t&)thr );
+            // node is removed from waitset, so there will be no wakeup
+            thr.in_waitset = false;
+            thr.skipped_wakeup = false;
         }
     }
 }
index e3aca27..1efa52e 100644 (file)
@@ -100,13 +100,13 @@ public:
     class thread_context : waitset_node_t, no_copy {
         friend class concurrent_monitor;
     public:
-        thread_context() : spurious(false), aborted(false), ready(false), context(0) {
+        thread_context() : skipped_wakeup(false), aborted(false), ready(false), context(0) {
             epoch = 0;
             in_waitset = false;
         }
         ~thread_context() {
             if (ready) {
-                if( spurious ) semaphore().P();
+                if( skipped_wakeup ) semaphore().P();
                 semaphore().~binary_semaphore();
             }
         }
@@ -119,7 +119,7 @@ public:
         tbb::aligned_space<binary_semaphore> sema;
         __TBB_atomic unsigned epoch;
         tbb::atomic<bool> in_waitset;
-        bool  spurious;
+        bool  skipped_wakeup;
         bool  aborted;
         bool  ready;
         uintptr_t context;
index 894cf4f..c922609 100644 (file)
@@ -360,10 +360,10 @@ void market::try_destroy_arena ( arena* a, uintptr_t aba_epoch ) {
 }
 
 /** This method must be invoked under my_arenas_list_mutex. **/
-arena* market::arena_in_need ( arena_list_type &arenas, arena *&next ) {
+arena* market::arena_in_need ( arena_list_type &arenas, arena *hint ) {
     if ( arenas.empty() )
         return NULL;
-    arena_list_type::iterator it = next;
+    arena_list_type::iterator it = hint;
     __TBB_ASSERT( it != arenas.end(), NULL );
     do {
         arena& a = *it;
@@ -375,11 +375,9 @@ arena* market::arena_in_need ( arena_list_type &arenas, arena *&next ) {
 #endif
             ) {
             a.my_references += arena::ref_worker;
-            as_atomic(next) = &*it; // a subject for innocent data race under the reader lock
-            // TODO: rework global round robin policy to local or random to avoid this write
             return &a;
         }
-    } while ( it != next );
+    } while ( it != hint );
     return NULL;
 }
 
@@ -415,6 +413,16 @@ int market::update_allotment ( arena_list_type& arenas, int workers_demand, int
     return assigned;
 }
 
+/** This method must be invoked under my_arenas_list_mutex. **/
+bool market::is_arena_in_list( arena_list_type &arenas, arena *a ) {
+    if ( a ) {
+        for ( arena_list_type::iterator it = arenas.begin(); it != arenas.end(); ++it )
+            if ( a == &*it )
+                return true;
+    }
+    return false;
+}
+
 #if __TBB_TASK_PRIORITY
 inline void market::update_global_top_priority ( intptr_t newPriority ) {
     GATHER_STATISTIC( ++governor::local_scheduler_if_initialized()->my_counters.market_prio_switches );
@@ -432,21 +440,29 @@ inline void market::reset_global_priority () {
     update_global_top_priority(normalized_normal_priority);
 }
 
-arena* market::arena_in_need ( arena* prev_arena )
-{
-    suppress_unused_warning(prev_arena);
+arena* market::arena_in_need ( arena* prev_arena ) {
     if( as_atomic(my_total_demand) <= 0 )
         return NULL;
     arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex, /*is_writer=*/false);
     assert_market_valid();
     int p = my_global_top_priority;
     arena *a = NULL;
-    do {
-        priority_level_info &pl = my_priority_levels[p];
+
+    // Checks if arena is alive or not
+    if ( is_arena_in_list( my_priority_levels[p].arenas, prev_arena ) ) {
+        a = arena_in_need( my_priority_levels[p].arenas, prev_arena );
+    }
+
+    while ( !a && p >= my_global_bottom_priority ) {
+        priority_level_info &pl = my_priority_levels[p--];
         a = arena_in_need( pl.arenas, pl.next_arena );
+        if ( a ) {
+            as_atomic(pl.next_arena) = a; // a subject for innocent data race under the reader lock
+            // TODO: rework global round robin policy to local or random to avoid this write
+        }
         // TODO: When refactoring task priority code, take into consideration the
         // __TBB_TRACK_PRIORITY_LEVEL_SATURATION sections from earlier versions of TBB
-    } while ( !a && --p >= my_global_bottom_priority );
+    }
     return a;
 }
 
@@ -681,7 +697,8 @@ void market::adjust_demand ( arena& a, int delta ) {
 
 void market::process( job& j ) {
     generic_scheduler& s = static_cast<generic_scheduler&>(j);
-    arena *a = NULL;
+    // s.my_arena can be dead. Don't access it until arena_in_need is called
+    arena *a = s.my_arena;
     __TBB_ASSERT( governor::is_set(&s), NULL );
     enum {
         query_interval = 1000,
@@ -691,6 +708,7 @@ void market::process( job& j ) {
         while ( (a = arena_in_need(a)) )
         {
             a->process(s);
+            a = NULL; // To avoid double checks in arena_in_need
             i = first_interval;
         }
         // Workers leave market because there is no arena in need. It can happen earlier than
index cc76805..d6bdf10 100644 (file)
@@ -217,6 +217,7 @@ private:
             update_allotment( my_arenas, my_total_demand, (int)my_num_workers_soft_limit );
     }
 
+    // TODO: consider to rewrite the code with is_arena_in_list function
     //! Returns next arena that needs more workers, or NULL.
     arena* arena_in_need (arena*) {
         if(__TBB_load_with_acquire(my_total_demand) <= 0)
@@ -234,10 +235,12 @@ private:
 
     void remove_arena_from_list ( arena& a );
 
-    arena* arena_in_need ( arena_list_type &arenas, arena *&next );
+    arena* arena_in_need ( arena_list_type &arenas, arena *hint );
 
     static int update_allotment ( arena_list_type& arenas, int total_demand, int max_workers );
 
+    bool is_arena_in_list( arena_list_type &arenas, arena *a );
+
 
     ////////////////////////////////////////////////////////////////////////////////
     // Implementation of rml::tbb_client interface methods
index ae25e57..bd79453 100644 (file)
@@ -275,6 +275,7 @@ void private_worker::run() {
             // Check/set the invariant for sleeping
             if( my_state!=st_quit && my_server.try_insert_in_asleep_list(*this) ) {
                 my_thread_monitor.commit_wait(c);
+                __TBB_ASSERT( my_state==st_quit || !my_next, "Thread monitor missed a spurious wakeup?" );
                 my_server.propagate_chain_reaction();
             } else {
                 // Invariant broken
@@ -310,8 +311,10 @@ inline void private_worker::wake_or_launch() {
             release_handle(my_handle, governor::does_client_join_workers(my_client));
         }
     }
-    else
+    else {
+        __TBB_ASSERT( !my_next, "Should not wake a thread while it's still in asleep list" );
         my_thread_monitor.notify();
+    }
 }
 
 //------------------------------------------------------------------------
@@ -390,8 +393,11 @@ void private_server::wake_some( int additional_slack ) {
         }
     }
 done:
-    while( w>wakee )
-        (*--w)->wake_or_launch();
+    while( w>wakee ) {
+        private_worker* ww = *--w;
+        ww->my_next = NULL;
+        ww->wake_or_launch();
+    }
 }
 
 void private_server::adjust_job_count_estimate( int delta ) {
index e80e931..ffcf680 100644 (file)
@@ -191,6 +191,8 @@ private:
 
 #if __TBB_USE_FUTEX
 class binary_semaphore : no_copy {
+// The implementation is equivalent to the "Mutex, Take 3" one
+// in the paper "Futexes Are Tricky" by Ulrich Drepper
 public:
     //! ctor
     binary_semaphore() { my_sem = 1; }
@@ -202,7 +204,7 @@ public:
         if( (s = my_sem.compare_and_swap( 1, 0 ))!=0 ) {
             if( s!=2 )
                 s = my_sem.fetch_and_store( 2 );
-            while( s!=0 ) {
+            while( s!=0 ) { // This loop deals with spurious wakeup
                 futex_wait( &my_sem, 2 );
                 s = my_sem.fetch_and_store( 2 );
             }
@@ -211,14 +213,11 @@ public:
     //! post/release
     void V() {
         __TBB_ASSERT( my_sem>=1, "multiple V()'s in a row?" );
-        if( my_sem--!=1 ) {
-            //if old value was 2
-            my_sem = 0;
+        if( my_sem.fetch_and_store( 0 )==2 )
             futex_wakeup_one( &my_sem );
-        }
     }
 private:
-    atomic<int> my_sem;
+    atomic<int> my_sem; // 0 - open; 1 - closed, no waits; 2 - closed, possible waits
 };
 #else
 typedef uint32_t sem_count_t;
index cd57b38..44d3bcc 100644 (file)
@@ -650,7 +650,7 @@ FreeBlock *Backend::splitAlignedBlock(FreeBlock *fBlock, int num, size_t size,
     return fBlock;
 }
 
-inline size_t Backend::getMaxBinnedSize() const
+size_t Backend::getMaxBinnedSize() const
 {
     return hugePages.isEnabled && !inUserPool() ?
         maxBinned_HugePage : maxBinned_SmallPage;
index c854a04..25930d2 100644 (file)
@@ -2340,7 +2340,7 @@ static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment)
 }
 
 static void *reallocAligned(MemoryPool *memPool, void *ptr,
-                            size_t size, size_t alignment = 0)
+                            size_t newSize, size_t alignment = 0)
 {
     void *result;
     size_t copySize;
@@ -2348,32 +2348,46 @@ static void *reallocAligned(MemoryPool *memPool, void *ptr,
     if (isLargeObject<ourMem>(ptr)) {
         LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
         copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb);
-        if (size <= copySize && (0==alignment || isAligned(ptr, alignment))) {
-            lmb->objectSize = size;
-            return ptr;
-        } else {
-            copySize = lmb->objectSize;
+
+        // Apply different strategies if size decreases
+        if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) {
+
+            // For huge objects (that do not fit in backend cache), keep the same space unless
+            // the new size is at least twice smaller
+            bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize();
+            size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0;
+            if (newSize > threshold) {
+                lmb->objectSize = newSize;
+                return ptr;
+            }
+            // TODO: For large objects suitable for the backend cache,
+            // split out the excessive part and put it to the backend.
+        }
+        // Reallocate for real
+        copySize = lmb->objectSize;
 #if BACKEND_HAS_MREMAP
-            if (void *r = memPool->extMemPool.remap(ptr, copySize, size,
-                              alignment<largeObjectAlignment?
-                              largeObjectAlignment : alignment))
-                return r;
+        if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize,
+                          alignment < largeObjectAlignment ? largeObjectAlignment : alignment))
+            return r;
 #endif
-            result = alignment ? allocateAligned(memPool, size, alignment) :
-                internalPoolMalloc(memPool, size);
-        }
+        result = alignment ? allocateAligned(memPool, newSize, alignment) :
+            internalPoolMalloc(memPool, newSize);
+
     } else {
         Block* block = (Block *)alignDown(ptr, slabSize);
         copySize = block->findObjectSize(ptr);
-        if (size <= copySize && (0==alignment || isAligned(ptr, alignment))) {
+
+        // TODO: Move object to another bin if size decreases and the current bin is "empty enough".
+        // Currently, in case of size decreasing, old pointer is returned
+        if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) {
             return ptr;
         } else {
-            result = alignment ? allocateAligned(memPool, size, alignment) :
-                internalPoolMalloc(memPool, size);
+            result = alignment ? allocateAligned(memPool, newSize, alignment) :
+                internalPoolMalloc(memPool, newSize);
         }
     }
     if (result) {
-        memcpy(result, ptr, copySize<size? copySize: size);
+        memcpy(result, ptr, copySize < newSize ? copySize : newSize);
         internalPoolFree(memPool, ptr, 0);
     }
     return result;
@@ -2602,6 +2616,7 @@ static size_t internalMsize(void* ptr)
     if (ptr) {
         MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected.");
         if (isLargeObject<ourMem>(ptr)) {
+            // TODO: return the maximum memory size, that can be written to this object 
             LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock;
             return lmb->objectSize;
         } else
index d31e757..b173ca1 100644 (file)
@@ -847,7 +847,7 @@ public:
         memSoftLimit = softLimit;
         releaseCachesToLimit();
     }
-    inline size_t getMaxBinnedSize() const;
+    size_t getMaxBinnedSize() const;
 
     bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); }
 
index 4083aa7..6f67a3f 100644 (file)
@@ -165,6 +165,25 @@ void TestProportionalSplitOverflow()
     REMARK("OK\n");
 }
 #endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+void TestDeductionGuides() {
+    std::vector<const int *> v;
+
+    // check blocked_range(Value, Value, size_t)
+    tbb::blocked_range r1(v.begin(), v.end());
+    static_assert(std::is_same<decltype(r1), tbb::blocked_range<decltype(v)::iterator>>::value);
+
+    // check blocked_range(blocked_range &)
+    tbb::blocked_range r2(r1);
+    static_assert(std::is_same<decltype(r2), decltype(r1)>::value);
+
+    // check blocked_range(blocked_range &&)
+    tbb::blocked_range r3(std::move(r1));
+    static_assert(std::is_same<decltype(r3), decltype(r1)>::value);
+}
+#endif
+
 //------------------------------------------------------------------------
 // Test driver
 #include "tbb/task_scheduler_init.h"
@@ -184,5 +203,8 @@ int TestMain () {
         TestProportionalSplitOverflow();
     #endif
 
+    #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+        TestDeductionGuides();
+    #endif
     return Harness::Done;
 }
index d4985ea..b3a878f 100644 (file)
@@ -136,6 +136,26 @@ void ParallelTest() {
     }
 }
 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+#include <vector>
+void TestDeductionGuides() {
+    std::vector<const unsigned long *> v;
+    std::vector<double> v2;
+
+    // check blocked_range2d(RowValue, RowValue, size_t, ColValue, ColValue, size_t)
+    tbb::blocked_range2d r1(v.begin(), v.end(), 2, v2.begin(), v2.end(), 2);
+    static_assert(std::is_same<decltype(r1), tbb::blocked_range2d<decltype(v)::iterator, decltype(v2)::iterator>>::value);
+
+    // check blocked_range2d(blocked_range2d &)
+    tbb::blocked_range2d r2(r1);
+    static_assert(std::is_same<decltype(r2), decltype(r1)>::value);
+
+    // check blocked_range2d(blocked_range2d &&)
+    tbb::blocked_range2d r3(std::move(r1));
+    static_assert(std::is_same<decltype(r3), decltype(r1)>::value);
+}
+#endif
+
 #include "tbb/task_scheduler_init.h"
 
 int TestMain () {
@@ -144,5 +164,9 @@ int TestMain () {
         tbb::task_scheduler_init init(p);
         ParallelTest();
     }
+
+    #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+        TestDeductionGuides();
+    #endif
     return Harness::Done;
 }
index b9a8c86..111d7e8 100644 (file)
@@ -167,6 +167,28 @@ void ParallelTest() {
     }
 }
 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+#include <vector>
+void TestDeductionGuides() {
+    std::vector<const unsigned long *> v;
+    std::vector<double> v2;
+    std::vector<std::vector<int>> v3;
+
+    // check blocked_range2d(PageValue, PageValue, size_t, RowValue, RowValue, size_t, ColValue, ColValue, size_t)
+    tbb::blocked_range3d r1(v.begin(), v.end(), 2, v2.begin(), v2.end(), 2, v3.begin(), v3.end(), 6);
+    static_assert(std::is_same<decltype(r1),
+        tbb::blocked_range3d<decltype(v)::iterator, decltype(v2)::iterator, decltype(v3)::iterator>>::value);
+
+    // check blocked_range2d(blocked_range3d &)
+    tbb::blocked_range3d r2(r1);
+    static_assert(std::is_same<decltype(r2), decltype(r1)>::value);
+
+    // check blocked_range2d(blocked_range3d &&)
+    tbb::blocked_range3d r3(std::move(r1));
+    static_assert(std::is_same<decltype(r2), decltype(r1)>::value);
+}
+#endif
+
 #include "tbb/task_scheduler_init.h"
 
 int TestMain () {
@@ -175,5 +197,9 @@ int TestMain () {
         tbb::task_scheduler_init init(p);
         ParallelTest();
     }
+
+    #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+        TestDeductionGuides();
+    #endif
     return Harness::Done;
 }
index e36006c..1c83fd0 100644 (file)
@@ -1203,12 +1203,16 @@ void Examine( Table c, const std::list<typename Table::value_type> &lst) {
     a.deallocate( ptr, 1 );
 }
 
+template<typename T>
+struct debug_hash_compare : tbb::tbb_hash_compare<T> {};
+
 template <bool default_construction_present, typename Value>
 void TypeTester( const std::list<Value> &lst ) {
     __TBB_ASSERT( lst.size() >= 5, "Array should have at least 5 elements" );
     typedef typename Value::first_type first_type;
     typedef typename Value::second_type second_type;
     typedef tbb::concurrent_hash_map<first_type,second_type> ch_map;
+    debug_hash_compare<first_type> compare;
     // Construct an empty hash map.
     ch_map c1;
     c1.insert( lst.begin(), lst.end() );
@@ -1216,34 +1220,55 @@ void TypeTester( const std::list<Value> &lst ) {
 #if __TBB_INITIALIZER_LISTS_PRESENT && !__TBB_CPP11_INIT_LIST_TEMP_OBJS_LIFETIME_BROKEN
     // Constructor from initializer_list.
     typename std::list<Value>::const_iterator it = lst.begin();
-    ch_map c2( {*it++, *it++, *it++} );
+    std::initializer_list<Value> il = { *it++, *it++, *it++ };
+    ch_map c2( il );
     c2.insert( it, lst.end() );
     Examine<default_construction_present>( c2, lst );
+
+    // Constructor from initializer_list and compare object
+    ch_map c3( il, compare);
+    c3.insert( it, lst.end() );
+    Examine<default_construction_present>( c3, lst );
+
+    // Constructor from initializer_list, compare object and allocator
+    ch_map c4( il, compare, typename ch_map::allocator_type());
+    c4.insert( it, lst.end());
+    Examine<default_construction_present>( c4, lst );
 #endif
     // Copying constructor.
-    ch_map c3(c1);
-    Examine<default_construction_present>( c3, lst );
+    ch_map c5(c1);
+    Examine<default_construction_present>( c5, lst );
     // Construct with non-default allocator
     typedef tbb::concurrent_hash_map< first_type,second_type,tbb::tbb_hash_compare<first_type>,debug_allocator<Value> > ch_map_debug_alloc;
-    ch_map_debug_alloc c4;
-    c4.insert( lst.begin(), lst.end() );
-    Examine<default_construction_present>( c4, lst );
-    // Copying constructor for vector with different allocator type.
-    ch_map_debug_alloc c5(c4);
-    Examine<default_construction_present>( c5, lst );
-    // Construction empty table with n preallocated buckets.
-    ch_map c6( lst.size() );
+    ch_map_debug_alloc c6;
     c6.insert( lst.begin(), lst.end() );
     Examine<default_construction_present>( c6, lst );
-    ch_map_debug_alloc c7( lst.size() );
-    c7.insert( lst.begin(), lst.end() );
+    // Copying constructor
+    ch_map_debug_alloc c7(c6);
     Examine<default_construction_present>( c7, lst );
-    // Construction with copying iteration range and given allocator instance.
-    ch_map c8( c1.begin(), c1.end() );
+    // Construction empty table with n preallocated buckets.
+    ch_map c8( lst.size() );
+    c8.insert( lst.begin(), lst.end() );
     Examine<default_construction_present>( c8, lst );
-    debug_allocator<Value> allocator;
-    ch_map_debug_alloc c9( lst.begin(), lst.end(), allocator );
+    ch_map_debug_alloc c9( lst.size() );
+    c9.insert( lst.begin(), lst.end() );
     Examine<default_construction_present>( c9, lst );
+    // Construction with copying iteration range.
+    ch_map c10( c1.begin(), c1.end() );
+    Examine<default_construction_present>( c10, lst );
+    // Construction with copying iteration range and given allocator instance.
+    debug_allocator<Value> allocator;
+    ch_map_debug_alloc c11( lst.begin(), lst.end(), allocator );
+    Examine<default_construction_present>( c11, lst );
+
+    typedef tbb::concurrent_hash_map< first_type,second_type,debug_hash_compare<first_type>,typename ch_map::allocator_type> ch_map_debug_hash;
+
+    // Constructor with two iterators and hash_compare
+    ch_map_debug_hash c12(c1.begin(), c1.end(), compare);
+    Examine<default_construction_present>( c12, lst );
+
+    ch_map_debug_hash c13(c1.begin(), c1.end(), compare, typename ch_map::allocator_type());
+    Examine<default_construction_present>( c13, lst );
 }
 
 #if __TBB_CPP11_SMART_POINTERS_PRESENT
@@ -1361,6 +1386,39 @@ void TestMoveSupport(){
     REPORT("Known issue: tests for C++11 move semantics support are skipped.\n");
 }
 #endif //__TBB_CPP11_RVALUE_REF_PRESENT
+
+template<typename Key>
+struct non_default_constructible_hash_compare : tbb::tbb_hash_compare<Key> {
+    non_default_constructible_hash_compare() {
+        ASSERT(false, "Hash compare object must not default construct during the construction of hash_map with compare argument");
+    }
+
+    non_default_constructible_hash_compare(int) {}
+};
+
+void TestHashCompareConstructors() {
+    typedef int key_type;
+    typedef tbb::concurrent_hash_map<key_type, key_type, non_default_constructible_hash_compare<key_type> > map_type;
+
+    non_default_constructible_hash_compare<key_type> compare(0);
+    map_type::allocator_type allocator;
+
+    map_type map1(compare);
+    map_type map2(compare, allocator);
+
+    map_type map3(1, compare);
+    map_type map4(1, compare, allocator);
+
+    std::vector<map_type::value_type> reference_vector;
+    map_type map5(reference_vector.begin(), reference_vector.end(), compare);
+    map_type map6(reference_vector.begin(), reference_vector.end(), compare, allocator);
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    map_type map7({}, compare);
+    map_type map8({}, compare, allocator);
+#endif
+}
+
 //------------------------------------------------------------------------
 // Test driver
 //------------------------------------------------------------------------
@@ -1419,6 +1477,7 @@ int TestMain () {
     }
 
     TestCPP11Types();
+    TestHashCompareConstructors();
 
     return Harness::Done;
 }
index 9773a03..0836fee 100644 (file)
@@ -1063,6 +1063,50 @@ void TestTypes() {
 #endif /* __TBB_CPP11_SMART_POINTERS_PRESENT */
 }
 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+template <template <typename...>typename TQueue>
+void TestDeductionGuides() {
+    using ComplexType = const std::string*;
+    std::string s("s");
+    std::vector<ComplexType> v;
+    auto l = {ComplexType(&s), ComplexType(&s) };
+
+    // check TQueue(InputIterator, InputIterator)
+    TQueue q1(v.begin(), v.end());
+    static_assert(std::is_same<decltype(q1), TQueue<ComplexType>>::value);
+
+    // check TQueue(InputIterator, InputIterator, Allocator)
+    TQueue q2(v.begin(), v.end(), std::allocator<ComplexType>());
+    static_assert(std::is_same<decltype(q2), TQueue<ComplexType, std::less<ComplexType>,
+        std::allocator<ComplexType>>>::value);
+
+    // check TQueue(std::initializer_list)
+    TQueue q3(l);
+    static_assert(std::is_same<decltype(q3), TQueue<ComplexType>>::value);
+
+    // check TQueue(std::initializer_list, Allocator)
+    TQueue q4(l, std::allocator<ComplexType>());
+    static_assert(std::is_same<decltype(q4), TQueue<ComplexType, std::less<ComplexType>,
+        std::allocator<ComplexType>>>::value);
+
+    // check TQueue(TQueue &)
+    TQueue q5(q1);
+    static_assert(std::is_same<decltype(q5), decltype(q5)>::value);
+
+    // check TQueue(TQueue &, Allocator)
+    TQueue q6(q4, std::allocator<ComplexType>());
+    static_assert(std::is_same<decltype(q6), decltype(q4)>::value);
+
+    // check TQueue(TQueue &&)
+    TQueue q7(std::move(q1));
+    static_assert(std::is_same<decltype(q7), decltype(q1)>::value);
+
+    // check TQueue(TQueue &&, Allocator)
+    TQueue q8(std::move(q4), std::allocator<ComplexType>());
+    static_assert(std::is_same<decltype(q8), decltype(q4)>::value);
+}
+#endif
+
 int TestMain() {
     if (MinThread < 1)
         MinThread = 1;
@@ -1076,6 +1120,10 @@ int TestMain() {
 
     TestTypes();
 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+    TestDeductionGuides<tbb::concurrent_priority_queue>();
+#endif
+
 #if __TBB_CPP11_RVALUE_REF_PRESENT
     TestgMoveConstructor();
     TestgMoveAssignOperator();
index 4c5d13d..72bf7e7 100644 (file)
@@ -1686,6 +1686,38 @@ void TestTypes() {
 #endif /* __TBB_CPP11_SMART_POINTERS_PRESENT */
 }
 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+template <template <typename...> typename TQueue>
+void TestDeductionGuides() {
+    using ComplexType = const std::string*;
+    std::vector<ComplexType> v;
+
+    // check TQueue(InputIterator, InputIterator)
+    TQueue q1(v.begin(), v.end());
+    static_assert(std::is_same<decltype(q1), TQueue<ComplexType>>::value);
+
+    // check TQueue(InputIterator, InputIterator, Allocator)
+    TQueue q2(v.begin(), v.end(), std::allocator<ComplexType>());
+    static_assert(std::is_same<decltype(q2), TQueue<ComplexType, std::allocator<ComplexType>>>::value);
+
+    // check TQueue(TQueue &)
+    TQueue q3(q1);
+    static_assert(std::is_same<decltype(q3), decltype(q1)>::value);
+
+    // check TQueue(TQueue &, Allocator)
+    TQueue q4(q2, std::allocator<ComplexType>());
+    static_assert(std::is_same<decltype(q4), decltype(q2)>::value);
+
+    // check TQueue(TQueue &&)
+    TQueue q5(std::move(q1));
+    static_assert(std::is_same<decltype(q5), decltype(q1)>::value);
+
+    // check TQueue(TQueue &&, Allocator)
+    TQueue q6(std::move(q4), std::allocator<ComplexType>());
+    static_assert(std::is_same<decltype(q6), decltype(q4)>::value);
+}
+#endif
+
 int TestMain () {
     TestEmptiness();
 
@@ -1720,5 +1752,10 @@ int TestMain () {
 
     TestTypes();
 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+    TestDeductionGuides<tbb::concurrent_queue>();
+    TestDeductionGuides<tbb::concurrent_bounded_queue>();
+#endif
+
     return Harness::Done;
 }
index 480498c..d973619 100644 (file)
@@ -1701,6 +1701,55 @@ void TestTypes() {
 #endif /* __TBB_CPP11_SMART_POINTERS_PRESENT */
 }
 
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+template <template <typename...> typename TVector>
+void TestDeductionGuides() {
+    using ComplexType = const std::string*;
+    std::vector<ComplexType> v;
+    std::string s = "s";
+    auto l = {ComplexType(&s), ComplexType(&s)};
+
+    // check TVector(InputIterator, InputIterator)
+    TVector v1(v.begin(), v.end());
+    static_assert(std::is_same<decltype(v1), TVector<ComplexType>>::value);
+
+    // check TVector(InputIterator, InputIterator, Alocator)
+    TVector v2(v.begin(), v.end(), std::allocator<ComplexType>());
+    static_assert(std::is_same<decltype(v2),
+       TVector<ComplexType, std::allocator<ComplexType>>>::value);
+
+    // check TVector(std::initializer_list<T>)
+    TVector v3(l);
+    static_assert(std::is_same<decltype(v3),
+        TVector<ComplexType>>::value);
+
+    // check TVector(std::initializer_list, Alocator)
+    TVector v4(l, std::allocator<ComplexType>());
+    static_assert(std::is_same<decltype(v4), TVector<ComplexType, std::allocator<ComplexType>>>::value);
+
+    // check TVector(TVector&)
+    TVector v5(v1);
+    static_assert(std::is_same<decltype(v5), TVector<ComplexType>>::value);
+
+    // check TVector(TVector&, Allocator)
+    TVector v6(v5, std::allocator<ComplexType>());
+    static_assert(std::is_same<decltype(v6), TVector<ComplexType, std::allocator<ComplexType>>>::value);
+
+    // check TVector(TVector&&)
+    TVector v7(std::move(v1));
+    static_assert(std::is_same<decltype(v7), decltype(v1)>::value);
+
+    // check TVector(TVector&&, Allocator)
+    TVector v8(std::move(v5), std::allocator<ComplexType>());
+    static_assert(std::is_same<decltype(v8), TVector<ComplexType, std::allocator<ComplexType>>>::value);
+
+    // check TVector(TVector&, Allocator)
+    TVector v9(v1, std::allocator<ComplexType>());
+    static_assert(std::is_same<decltype(v9), TVector<ComplexType, std::allocator<ComplexType>>>::value);
+
+}
+#endif
+
 int TestMain () {
     if( MinThread<1 ) {
         REPORT("ERROR: MinThread=%d, but must be at least 1\n",MinThread); MinThread = 1;
@@ -1780,6 +1829,9 @@ int TestMain () {
 #endif /*__TBB_CPP11_RVALUE_REF_PRESENT */
 #endif /* TBB_USE_EXCEPTIONS */
     TestTypes();
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+    TestDeductionGuides<tbb::concurrent_vector>();
+#endif
     ASSERT( !FooCount, NULL );
     REMARK("sizeof(concurrent_vector<int>) == %d\n", (int)sizeof(tbb::concurrent_vector<int>));
     return Harness::Done;
index 481999e..5626ec7 100644 (file)
@@ -1146,7 +1146,6 @@ void TestSlabAlignment() {
     }
 }
 
-#include "harness.h"
 #include "harness_memory.h"
 
 // TODO: Consider adding Huge Pages support on macOS (special mmap flag).
@@ -1206,7 +1205,7 @@ void TestTHP() {
     if ((newSystemTHPCount - currentSystemTHPCount) < allocCount
             && (newSystemTHPAllocatedSize - currentSystemTHPAllocatedSize) / (2 * 1024) < allocCount) {
         REPORT( "Warning: the system didn't allocate needed amount of THPs.\n" );
-    } 
+    }
 
     // Test memory unmap
     for (int i = 0; i < allocCount; i++) {
@@ -1216,6 +1215,75 @@ void TestTHP() {
 }
 #endif // __linux__
 
+inline size_t getStabilizedMemUsage() {
+    for (int i = 0; i < 3; i++) GetMemoryUsage();
+    return GetMemoryUsage();
+}
+
+inline void* reallocAndRetrieve(void* origPtr, size_t reallocSize, size_t& origBlockSize, size_t& reallocBlockSize) {
+    rml::internal::LargeMemoryBlock* origLmb = ((rml::internal::LargeObjectHdr *)origPtr - 1)->memoryBlock;
+    origBlockSize = origLmb->unalignedSize;
+
+    void* reallocPtr = rml::internal::reallocAligned(defaultMemPool, origPtr, reallocSize, 0);
+
+    // Retrieved reallocated block information
+    rml::internal::LargeMemoryBlock* reallocLmb = ((rml::internal::LargeObjectHdr *)reallocPtr - 1)->memoryBlock;
+    reallocBlockSize = reallocLmb->unalignedSize;
+
+    return reallocPtr;
+}
+
+void TestReallocDecreasing() {
+
+    /* Testing that actual reallocation happens for large objects that do not fit the backend cache
+       but decrease in size by a factor of >= 2. */
+
+    size_t startSize = 100 * 1024 * 1024;
+    size_t maxBinnedSize = defaultMemPool->extMemPool.backend.getMaxBinnedSize();
+    void*  origPtr = scalable_malloc(startSize);
+    void*  reallocPtr = NULL;
+
+    // Realloc on 1MB less size
+    size_t origBlockSize = 42;
+    size_t reallocBlockSize = 43;
+    reallocPtr = reallocAndRetrieve(origPtr, startSize - 1 * 1024 * 1024, origBlockSize, reallocBlockSize);
+    MALLOC_ASSERT(origBlockSize == reallocBlockSize, "Reallocated block size shouldn't change");
+    MALLOC_ASSERT(reallocPtr == origPtr, "Original pointer shouldn't change");
+
+    // Repeated decreasing reallocation while max cache bin size reached
+    size_t reallocSize = (startSize / 2) - 1000; // exact realloc
+    while(reallocSize > maxBinnedSize) {
+
+        // Prevent huge/large objects caching 
+        defaultMemPool->extMemPool.loc.cleanAll();
+        // Prevent local large object caching
+        TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
+        tls->lloc.externalCleanup(&defaultMemPool->extMemPool);
+
+        size_t sysMemUsageBefore = getStabilizedMemUsage();
+        size_t totalMemSizeBefore = defaultMemPool->extMemPool.backend.getTotalMemSize();
+
+        reallocPtr = reallocAndRetrieve(origPtr, reallocSize, origBlockSize, reallocBlockSize);
+
+        MALLOC_ASSERT(origBlockSize > reallocBlockSize, "Reallocated block size should descrease.");
+
+        size_t sysMemUsageAfter = getStabilizedMemUsage();
+        size_t totalMemSizeAfter = defaultMemPool->extMemPool.backend.getTotalMemSize();
+
+        // Prevent false checking when backend caching occurred or could not read system memory usage info
+        if (totalMemSizeBefore > totalMemSizeAfter && sysMemUsageAfter != 0 && sysMemUsageBefore != 0) {
+            MALLOC_ASSERT(sysMemUsageBefore > sysMemUsageAfter, "Memory were not released");
+        }
+
+        origPtr = reallocPtr;
+        reallocSize = (reallocSize / 2) - 1000; // exact realloc
+    }
+    scalable_free(reallocPtr);
+
+    /* TODO: Decreasing reallocation of large objects that fit backend cache */
+    /* TODO: Small objects decreasing reallocation test */
+}
+
 int TestMain () {
     scalable_allocation_mode(USE_HUGE_PAGES, 0);
 #if !__TBB_WIN8UI_SUPPORT
@@ -1246,6 +1314,7 @@ int TestMain () {
     TestHeapLimit();
     TestLOC();
     TestSlabAlignment();
+    TestReallocDecreasing();
 
 #if __linux__
     if (isTHPEnabledOnMachine()) {
index 6425c2c..24e78b7 100644 (file)
 
 #define TBB_PREVIEW_LOCAL_OBSERVER 1
 #define __TBB_EXTRA_DEBUG 1
+#define TBB_PREVIEW_GLOBAL_CONTROL 1
 
 #include <stdexcept>
 #include <cstdlib>
 #include <cstdio>
+#include <vector>
+#include <set>
 
 #include "harness_fp.h"
 
@@ -36,6 +39,7 @@
 #endif /* __TBB_TASK_ISOLATION */
 
 #include "tbb/task_arena.h"
+#include "tbb/atomic.h"
 #include "tbb/task_scheduler_observer.h"
 #include "tbb/task_scheduler_init.h"
 #include "tbb/parallel_for.h"
@@ -51,6 +55,7 @@
 #pragma comment(lib, __TBB_STRING(__TBB_LIB_NAME))
 #endif
 
+#include "tbb/global_control.h"
 //--------------------------------------------------//
 // Test that task_arena::initialize and task_arena::terminate work when doing nothing else.
 /* maxthread is treated as the biggest possible concurrency level. */
@@ -1457,7 +1462,140 @@ void TestDefaultWorkersLimit() {
 }
 //--------------------------------------------------//
 
-int TestMain () {
+// MyObserver checks if threads join to the same arena
+struct MyObserver: public tbb::task_scheduler_observer {
+    tbb::enumerable_thread_specific<tbb::task_arena*>& my_tls;
+    tbb::task_arena& my_arena;
+    tbb::atomic<int>& my_failure_counter;
+    tbb::atomic<int>& my_counter;
+
+    MyObserver(tbb::task_arena& a,
+        tbb::enumerable_thread_specific<tbb::task_arena*>& tls,
+        tbb::atomic<int>& failure_counter,
+        tbb::atomic<int>& counter)
+        : tbb::task_scheduler_observer(a), my_tls(tls), my_arena(a),
+        my_failure_counter(failure_counter), my_counter(counter) {
+        observe(true);
+    }
+    void on_scheduler_entry(bool worker) __TBB_override {
+        if (worker) {
+            ++my_counter;
+            tbb::task_arena*& cur_arena = my_tls.local();
+            if (cur_arena != 0 && cur_arena != &my_arena) {
+                ++my_failure_counter;
+            }
+            cur_arena = &my_arena;
+        }
+    }
+};
+
+struct MyLoopBody {
+    Harness::SpinBarrier& m_barrier;
+    MyLoopBody(Harness::SpinBarrier& b):m_barrier(b) { }
+    void operator()(int) const {
+        m_barrier.wait();
+    }
+};
+
+struct TaskForArenaExecute {
+    Harness::SpinBarrier& m_barrier;
+    TaskForArenaExecute(Harness::SpinBarrier& b):m_barrier(b) { }
+    void operator()() const {
+         tbb::parallel_for(0, tbb::this_task_arena::max_concurrency(),
+             MyLoopBody(m_barrier), tbb::simple_partitioner()
+         );
+    }
+};
+
+struct ExecuteParallelFor {
+    int n_per_thread;
+    int n_repetitions;
+    std::vector<tbb::task_arena>& arenas;
+    Harness::SpinBarrier& arena_barrier;
+    Harness::SpinBarrier& master_barrier;
+    ExecuteParallelFor(const int n_per_thread_, const int n_repetitions_,
+        std::vector<tbb::task_arena>& arenas_,
+        Harness::SpinBarrier& arena_barrier_, Harness::SpinBarrier& master_barrier_)
+            : n_per_thread(n_per_thread_), n_repetitions(n_repetitions_), arenas(arenas_),
+              arena_barrier(arena_barrier_), master_barrier(master_barrier_){ }
+    void operator()(int i) const {
+        for (int j = 0; j < n_repetitions; ++j) {
+            arenas[i].execute(TaskForArenaExecute(arena_barrier));
+            for(volatile int k = 0; k < n_per_thread; ++k){/* waiting until workers fall asleep */}
+            master_barrier.wait();
+        }
+    }
+};
+
+// if n_threads == -1 then global_control initialized with default value
+void TestArenaWorkersMigrationWithNumThreads(int n_threads = 0) {
+    if (n_threads == 0) {
+        n_threads = tbb::task_scheduler_init::default_num_threads();
+    }
+    const int max_n_arenas = 8;
+    int n_arenas = 2;
+    if(n_threads >= 16)
+        n_arenas = max_n_arenas;
+    else if (n_threads >= 8)
+        n_arenas = 4;
+    n_threads = n_arenas * (n_threads / n_arenas);
+    const int n_per_thread = 10000000;
+    const int n_repetitions = 100;
+    const int n_outer_repetitions = 20;
+    std::multiset<float> failure_ratio; // for median calculating
+    tbb::global_control control(tbb::global_control::max_allowed_parallelism, n_threads - (n_arenas - 1));
+    Harness::SpinBarrier master_barrier(n_arenas);
+    Harness::SpinBarrier arena_barrier(n_threads);
+    MyObserver* observer[max_n_arenas];
+    std::vector<tbb::task_arena> arenas(n_arenas);
+    tbb::atomic<int> failure_counter;
+    tbb::atomic<int> counter;
+    tbb::enumerable_thread_specific<tbb::task_arena*> tls;
+    for (int i = 0; i < n_arenas; ++i) {
+        arenas[i].initialize(n_threads / n_arenas);
+        observer[i] = new MyObserver(arenas[i], tls, failure_counter, counter);
+    }
+    int ii = 0;
+    for (; ii < n_outer_repetitions; ++ii) {
+        failure_counter = 0;
+        counter = 0;
+        // Main code
+        NativeParallelFor(n_arenas, ExecuteParallelFor(n_per_thread, n_repetitions,
+            arenas, arena_barrier, master_barrier));
+               // TODO: get rid of check below by setting ratio between n_threads and n_arenas
+        failure_ratio.insert((counter != 0 ? float(failure_counter) / counter : 1.0f));
+        tls.clear();
+        // collect 3 elements in failure_ratio before calculating median
+        if (ii > 1) {
+            std::multiset<float>::iterator it = failure_ratio.begin();
+            std::advance(it, failure_ratio.size() / 2);
+            if (*it < 0.02)
+                break;
+        }
+    }
+    for (int i = 0; i < n_arenas; ++i) {
+        delete observer[i];
+    }
+    // check if median is so big
+    std::multiset<float>::iterator it = failure_ratio.begin();
+    std::advance(it, failure_ratio.size() / 2);
+       // TODO: decrease constants 0.05 and 0.3 by setting ratio between n_threads and n_arenas
+    if (*it > 0.05) {
+        REPORT("Warning: So many cases when threads join to different arenas.\n");
+        ASSERT(*it <= 0.3, "A lot of cases when threads join to different arenas.\n");
+    }
+}
+
+void TestArenaWorkersMigration() {
+    TestArenaWorkersMigrationWithNumThreads(4);
+    if (tbb::task_scheduler_init::default_num_threads() != 4) {
+        TestArenaWorkersMigrationWithNumThreads();
+    }
+}
+
+//--------------------------------------------------//
+
+int TestMain() {
 #if __TBB_TASK_ISOLATION
     TestIsolatedExecute();
 #endif /* __TBB_TASK_ISOLATION */
@@ -1474,6 +1612,7 @@ int TestMain () {
     TestMultipleWaits();
     TestMoveSemantics();
     TestReturnValue();
+    TestArenaWorkersMigration();
     return Harness::Done;
 }
 
index fe2500d..a114106 100644 (file)
@@ -309,7 +309,11 @@ int TestMain ()
     TestFuncDefinitionPresence( parallel_do, (const intarray&, const Body1a&, tbb::task_group_context&), void );
     TestFuncDefinitionPresence( parallel_for_each, (const intarray&, const Body1&, tbb::task_group_context&), void );
     TestFuncDefinitionPresence( parallel_for, (int, int, const Body1&, const tbb::auto_partitioner&, tbb::task_group_context&), void );
+    TestFuncDefinitionPresence( parallel_for, (int, int, const Body1&, tbb::task_group_context&), void );
     TestFuncDefinitionPresence( parallel_reduce, (const tbb::blocked_range<int>&, Body2&, const tbb::auto_partitioner&, tbb::task_group_context&), void );
+    TestFuncDefinitionPresence( parallel_reduce, (const tbb::blocked_range<int>&, Body2&, tbb::task_group_context&), void );
+    TestFuncDefinitionPresence( parallel_deterministic_reduce, (const tbb::blocked_range<int>&, Body2&, const tbb::simple_partitioner&, tbb::task_group_context&), void );
+    TestFuncDefinitionPresence( parallel_deterministic_reduce, (const tbb::blocked_range<int>&, Body2&, tbb::task_group_context&), void );
 #endif /* __TBB_TASK_GROUP_CONTEXT */
     TestTypeDefinitionPresence( proportional_split );
 
index 3c595a7..da9efd7 100644 (file)
@@ -229,7 +229,7 @@ int main(int argc, char *argv[] ) {
 void initialize_strings_vector(std::vector <string_pair>* vector)
 {
     vector->push_back(string_pair("TBB: VERSION\t\t2019.0", required));       // check TBB_VERSION
-    vector->push_back(string_pair("TBB: INTERFACE VERSION\t11001", required)); // check TBB_INTERFACE_VERSION
+    vector->push_back(string_pair("TBB: INTERFACE VERSION\t11002", required)); // check TBB_INTERFACE_VERSION
     vector->push_back(string_pair("TBB: BUILD_DATE", required));
     vector->push_back(string_pair("TBB: BUILD_HOST", required));
     vector->push_back(string_pair("TBB: BUILD_OS", required));