From 8ff3697f544c5a8728146b70ae3a978025be1f3e Mon Sep 17 00:00:00 2001 From: tbbdev Date: Wed, 7 Nov 2018 14:38:02 +0300 Subject: [PATCH] Committing TBB 2019 Update 2 source code --- CHANGES | 27 ++++++ README.md | 4 +- doc/Release_Notes.txt | 2 +- include/tbb/concurrent_hash_map.h | 33 ++++++- include/tbb/concurrent_priority_queue.h | 10 +- include/tbb/concurrent_queue.h | 18 ++++ include/tbb/concurrent_vector.h | 19 ++++ include/tbb/parallel_reduce.h | 18 ++++ include/tbb/tbb_config.h | 20 +++- include/tbb/tbb_stddef.h | 2 +- src/rml/server/thread_monitor.h | 25 ++--- src/tbb/concurrent_monitor.cpp | 17 ++-- src/tbb/concurrent_monitor.h | 6 +- src/tbb/market.cpp | 42 ++++++--- src/tbb/market.h | 5 +- src/tbb/private_server.cpp | 12 ++- src/tbb/semaphore.h | 11 +-- src/tbbmalloc/backend.cpp | 2 +- src/tbbmalloc/frontend.cpp | 49 ++++++---- src/tbbmalloc/tbbmalloc_internal.h | 2 +- src/test/test_blocked_range.cpp | 22 +++++ src/test/test_blocked_range2d.cpp | 24 +++++ src/test/test_blocked_range3d.cpp | 26 +++++ src/test/test_concurrent_hash_map.cpp | 93 ++++++++++++++---- src/test/test_concurrent_priority_queue.cpp | 48 ++++++++++ src/test/test_concurrent_queue.cpp | 37 ++++++++ src/test/test_concurrent_vector.cpp | 52 ++++++++++ src/test/test_malloc_whitebox.cpp | 73 +++++++++++++- src/test/test_task_arena.cpp | 141 +++++++++++++++++++++++++++- src/test/test_tbb_header.cpp | 4 + src/test/test_tbb_version.cpp | 2 +- 31 files changed, 750 insertions(+), 96 deletions(-) diff --git a/CHANGES b/CHANGES index 3cc1f5f..5105e3c 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,30 @@ The list of most significant changes made over time in Intel(R) Threading Building Blocks (Intel(R) TBB). +Intel TBB 2019 Update 2 +TBB_INTERFACE_VERSION == 11002 + +Changes (w.r.t. Intel TBB 2019 Update 1): + +- Added constructors with HashCompare argument to concurrent_hash_map + (https://github.com/01org/tbb/pull/63). +- Added overloads for parallel_reduce with default partitioner and + user-supplied context. +- Added deduction guides for tbb containers: concurrent_vector, + concurrent_queue, concurrent_bounded_queue, + concurrent_priority_queue. +- Reallocation of memory objects >1MB now copies and frees memory if + the size is decreased twice or more, trading performance off for + reduced memory usage. +- After a period of sleep, TBB worker threads now prefer returning to + their last used task arena. + +Bugs fixed: + +- Fixed compilation of task_group.h when targeting macOS* 10.11 or + earlier (https://github.com/conda-forge/tbb-feedstock/issues/42). + +------------------------------------------------------------------------ Intel TBB 2019 Update 1 TBB_INTERFACE_VERSION == 11001 @@ -27,6 +51,9 @@ Bugs fixed: observer. - Fixed compilation of task_group.h by Visual C++* 15.7 with /permissive- option (https://github.com/01org/tbb/issues/53). +- Fixed tbb4py to avoid dependency on Intel(R) C++ Compiler shared + libraries. +- Fixed compilation for Anaconda environment with GCC 7.3 and higher. ------------------------------------------------------------------------ Intel TBB 2019 diff --git a/README.md b/README.md index a502785..4a9d1bd 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# Threading Building Blocks 2019 Update 1 -[![Stable release](https://img.shields.io/badge/version-2019_U1-green.svg)](https://github.com/01org/tbb/releases/tag/2019_U1) +# Threading Building Blocks 2019 Update 2 +[![Stable release](https://img.shields.io/badge/version-2019_U2-green.svg)](https://github.com/01org/tbb/releases/tag/2019_U2) [![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE) Threading Building Blocks (TBB) lets you easily write parallel C++ programs that take diff --git a/doc/Release_Notes.txt b/doc/Release_Notes.txt index 045f031..aba73d9 100644 --- a/doc/Release_Notes.txt +++ b/doc/Release_Notes.txt @@ -92,7 +92,7 @@ Software - Supported Compilers GNU Compilers (gcc) 4.1 - 7.1 GNU C Library (glibc) version 2.4 - 2.19 Xcode* 7.0 - 9.1 - Android* NDK r10e - r16 + Android* NDK r10e - r17b Software - Supported Performance Analysis Tools diff --git a/include/tbb/concurrent_hash_map.h b/include/tbb/concurrent_hash_map.h index f84dfd3..c9030e7 100644 --- a/include/tbb/concurrent_hash_map.h +++ b/include/tbb/concurrent_hash_map.h @@ -759,9 +759,19 @@ public: : internal::hash_map_base(), my_allocator(a) {} + explicit concurrent_hash_map( const HashCompare& compare, const allocator_type& a = allocator_type() ) + : internal::hash_map_base(), my_allocator(a), my_hash_compare(compare) + {} + //! Construct empty table with n preallocated buckets. This number serves also as initial concurrency level. concurrent_hash_map( size_type n, const allocator_type &a = allocator_type() ) - : my_allocator(a) + : internal::hash_map_base(), my_allocator(a) + { + reserve( n ); + } + + concurrent_hash_map( size_type n, const HashCompare& compare, const allocator_type& a = allocator_type() ) + : internal::hash_map_base(), my_allocator(a), my_hash_compare(compare) { reserve( n ); } @@ -800,7 +810,16 @@ public: //! Construction with copying iteration range and given allocator instance template concurrent_hash_map( I first, I last, const allocator_type &a = allocator_type() ) - : my_allocator(a) + : internal::hash_map_base(), my_allocator(a) + { + call_clear_on_leave scope_guard(this); + internal_copy(first, last, std::distance(first, last)); + scope_guard.dismiss(); + } + + template + concurrent_hash_map( I first, I last, const HashCompare& compare, const allocator_type& a = allocator_type() ) + : internal::hash_map_base(), my_allocator(a), my_hash_compare(compare) { call_clear_on_leave scope_guard(this); internal_copy(first, last, std::distance(first, last)); @@ -810,7 +829,15 @@ public: #if __TBB_INITIALIZER_LISTS_PRESENT //! Construct empty table with n preallocated buckets. This number serves also as initial concurrency level. concurrent_hash_map( std::initializer_list il, const allocator_type &a = allocator_type() ) - : my_allocator(a) + : internal::hash_map_base(), my_allocator(a) + { + call_clear_on_leave scope_guard(this); + internal_copy(il.begin(), il.end(), il.size()); + scope_guard.dismiss(); + } + + concurrent_hash_map( std::initializer_list il, const HashCompare& compare, const allocator_type& a = allocator_type() ) + : internal::hash_map_base(), my_allocator(a), my_hash_compare(compare) { call_clear_on_leave scope_guard(this); internal_copy(il.begin(), il.end(), il.size()); diff --git a/include/tbb/concurrent_priority_queue.h b/include/tbb/concurrent_priority_queue.h index caab8bb..ad4e85d 100644 --- a/include/tbb/concurrent_priority_queue.h +++ b/include/tbb/concurrent_priority_queue.h @@ -121,7 +121,7 @@ class concurrent_priority_queue { //! Copy constructor /** This operation is unsafe if there are pending concurrent operations on the src queue. */ - explicit concurrent_priority_queue(const concurrent_priority_queue& src) : mark(src.mark), + concurrent_priority_queue(const concurrent_priority_queue& src) : mark(src.mark), my_size(src.my_size), data(src.data.begin(), src.data.end(), src.data.get_allocator()) { my_aggregator.initialize_handler(my_functor_t(this)); @@ -481,6 +481,14 @@ class concurrent_priority_queue { } }; +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +// Deduction guide for the constructor from two iterators +template::value_type, + typename A = cache_aligned_allocator +> concurrent_priority_queue(InputIterator, InputIterator, const A& = A()) +-> concurrent_priority_queue, A>; +#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ } // namespace interface5 using interface5::concurrent_priority_queue; diff --git a/include/tbb/concurrent_queue.h b/include/tbb/concurrent_queue.h index 0e9bc35..98b434b 100644 --- a/include/tbb/concurrent_queue.h +++ b/include/tbb/concurrent_queue.h @@ -177,6 +177,15 @@ public: const_iterator unsafe_end() const {return const_iterator();} } ; +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +// Deduction guide for the constructor from two iterators +template::value_type, + typename A = cache_aligned_allocator +> concurrent_queue(InputIterator, InputIterator, const A& = A()) +-> concurrent_queue; +#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ + template concurrent_queue::~concurrent_queue() { clear(); @@ -439,6 +448,15 @@ public: }; +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +// guide for concurrent_bounded_queue(InputIterator, InputIterator, ...) +template::value_type, + typename A = cache_aligned_allocator +> concurrent_bounded_queue(InputIterator, InputIterator, const A& = A()) +-> concurrent_bounded_queue; +#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ + template concurrent_bounded_queue::~concurrent_bounded_queue() { clear(); diff --git a/include/tbb/concurrent_vector.h b/include/tbb/concurrent_vector.h index 5d8f1d4..e4a9448 100644 --- a/include/tbb/concurrent_vector.h +++ b/include/tbb/concurrent_vector.h @@ -1156,6 +1156,25 @@ private: }; }; +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +// Deduction guide for the constructor from two iterators +template::value_type, + typename A = cache_aligned_allocator +> concurrent_vector(I, I, const A& = A()) +-> concurrent_vector; + +// Deduction guide for the constructor from a vector and allocator +template +concurrent_vector(const concurrent_vector &, const A2 &) +-> concurrent_vector; + +// Deduction guide for the constructor from an initializer_list +template +> concurrent_vector(std::initializer_list, const A& = A()) +-> concurrent_vector; +#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ + #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #pragma warning (push) #pragma warning (disable: 4701) // potentially uninitialized local variable "old" diff --git a/include/tbb/parallel_reduce.h b/include/tbb/parallel_reduce.h index 52afc6a..9429d19 100644 --- a/include/tbb/parallel_reduce.h +++ b/include/tbb/parallel_reduce.h @@ -393,6 +393,13 @@ void parallel_reduce( const Range& range, Body& body, affinity_partitioner& part } #if __TBB_TASK_GROUP_CONTEXT +//! Parallel iteration with reduction, default partitioner and user-supplied context. +/** @ingroup algorithms **/ +template +void parallel_reduce( const Range& range, Body& body, task_group_context& context ) { + internal::start_reduce::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); +} + //! Parallel iteration with reduction, simple partitioner and user-supplied context. /** @ingroup algorithms **/ template @@ -480,6 +487,17 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody } #if __TBB_TASK_GROUP_CONTEXT +//! Parallel iteration with reduction, default partitioner and user-supplied context. +/** @ingroup algorithms **/ +template +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + task_group_context& context ) { + internal::lambda_reduce_body body(identity, real_body, reduction); + internal::start_reduce,const __TBB_DEFAULT_PARTITIONER> + ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); + return body.result(); +} + //! Parallel iteration with reduction, simple partitioner and user-supplied context. /** @ingroup algorithms **/ template diff --git a/include/tbb/tbb_config.h b/include/tbb/tbb_config.h index 74f59c1..dd585c7 100644 --- a/include/tbb/tbb_config.h +++ b/include/tbb/tbb_config.h @@ -56,7 +56,7 @@ #endif #if __clang__ - /** according to clang documentation, version can be vendor specific **/ + // according to clang documentation, version can be vendor specific #define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) #endif @@ -65,6 +65,16 @@ #define __TBB_IOS 1 #endif +#if __APPLE__ + #if __INTEL_COMPILER && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1099 \ + && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101000 + // ICC does not correctly set the macro if -mmacosx-min-version is not specified + #define __TBB_MACOS_TARGET_VERSION (100000 + 10*(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ - 1000)) + #else + #define __TBB_MACOS_TARGET_VERSION __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ + #endif +#endif + /** Preprocessor symbols to determine HW architecture **/ #if _WIN32||_WIN64 @@ -208,6 +218,7 @@ #define __TBB_ALIGNAS_PRESENT (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1500) #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1210) #define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__cplusplus >= 201402L) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT __INTEL_COMPILER > 1900 #elif __clang__ /** TODO: these options need to be rechecked **/ #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT __has_feature(__cxx_variadic_templates__) @@ -237,6 +248,7 @@ #define __TBB_ALIGNAS_PRESENT __has_feature(cxx_alignas) #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT __has_feature(cxx_alias_templates) #define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__cplusplus >= 201402L) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__has_feature(__cpp_deduction_guides)) #elif __GNUC__ #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT __GXX_EXPERIMENTAL_CXX0X__ #define __TBB_CPP11_VARIADIC_FIXED_LENGTH_EXP_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40700) @@ -262,6 +274,7 @@ #define __TBB_ALIGNAS_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40800) #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40700) #define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__cplusplus >= 201402L && __TBB_GCC_VERSION >= 50000) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201606) #elif _MSC_VER // These definitions are also used with Intel C++ Compiler in "default" mode (__INTEL_CXX11_MODE__ == 0); // see a comment in "__INTEL_COMPILER" section above. @@ -286,6 +299,7 @@ #define __TBB_ALIGNAS_PRESENT (_MSC_VER >= 1900) #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT (_MSC_VER >= 1800) #define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (_MSC_VER >= 1900) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (_MSVC_LANG >= 201703L) #else #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT 0 #define __TBB_CPP11_RVALUE_REF_PRESENT 0 @@ -306,6 +320,7 @@ #define __TBB_ALIGNAS_PRESENT 0 #define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT 0 #define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__cplusplus >= 201402L) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 0 #endif // C++11 standard library features @@ -337,7 +352,8 @@ #define __TBB_CPP11_GET_NEW_HANDLER_PRESENT (_MSC_VER >= 1900 || __TBB_GLIBCXX_VERSION >= 40900 && __GXX_EXPERIMENTAL_CXX0X__ || _LIBCPP_VERSION) -#define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions || _LIBCPP_VERSION >= 3700) +#define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions \ + || _LIBCPP_VERSION >= 3700 && (!__TBB_MACOS_TARGET_VERSION || __TBB_MACOS_TARGET_VERSION >= 101200)) // std::swap is in only since C++11, though MSVC had it at least since VS2005 #if _MSC_VER>=1400 || _LIBCPP_VERSION || __GXX_EXPERIMENTAL_CXX0X__ diff --git a/include/tbb/tbb_stddef.h b/include/tbb/tbb_stddef.h index 37961df..a177421 100644 --- a/include/tbb/tbb_stddef.h +++ b/include/tbb/tbb_stddef.h @@ -26,7 +26,7 @@ #define TBB_VERSION_MINOR 0 // Engineering-focused interface version -#define TBB_INTERFACE_VERSION 11001 +#define TBB_INTERFACE_VERSION 11002 #define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000 // The oldest major interface version still supported diff --git a/src/rml/server/thread_monitor.h b/src/rml/server/thread_monitor.h index 4ddd5bf..2b59bb7 100644 --- a/src/rml/server/thread_monitor.h +++ b/src/rml/server/thread_monitor.h @@ -78,7 +78,7 @@ public: friend class thread_monitor; tbb::atomic my_epoch; }; - thread_monitor() : spurious(false), my_sema() { + thread_monitor() : skipped_wakeup(false), my_sema() { my_cookie.my_epoch = 0; ITT_SYNC_CREATE(&my_sema, SyncType_RML, SyncObj_ThreadMonitor); in_wait = false; @@ -129,9 +129,9 @@ public: //! Detach thread static void detach_thread(handle_type handle); private: - cookie my_cookie; - tbb::atomic in_wait; - bool spurious; + cookie my_cookie; // epoch counter + tbb::atomic in_wait; + bool skipped_wakeup; tbb::internal::binary_semaphore my_sema; #if USE_PTHREAD static void check( int error_code, const char* routine ); @@ -244,24 +244,25 @@ inline void thread_monitor::notify() { } inline void thread_monitor::prepare_wait( cookie& c ) { - if( spurious ) { - spurious = false; - // consumes a spurious posted signal. don't wait on my_sema. - my_sema.P(); + if( skipped_wakeup ) { + // Lazily consume a signal that was skipped due to cancel_wait + skipped_wakeup = false; + my_sema.P(); // does not really wait on the semaphore } c = my_cookie; - in_wait = true; - __TBB_full_memory_fence(); + in_wait.store( true ); } inline void thread_monitor::commit_wait( cookie& c ) { - bool do_it = ( c.my_epoch == my_cookie.my_epoch); + bool do_it = ( c.my_epoch == my_cookie.my_epoch ); if( do_it ) my_sema.P(); else cancel_wait(); } inline void thread_monitor::cancel_wait() { - spurious = ! in_wait.fetch_and_store( false ); + // if not in_wait, then some thread has sent us a signal; + // it will be consumed by the next prepare_wait call + skipped_wakeup = ! in_wait.fetch_and_store( false ); } } // namespace internal diff --git a/src/tbb/concurrent_monitor.cpp b/src/tbb/concurrent_monitor.cpp index 375e7b7..e8501b0 100644 --- a/src/tbb/concurrent_monitor.cpp +++ b/src/tbb/concurrent_monitor.cpp @@ -36,9 +36,9 @@ concurrent_monitor::~concurrent_monitor() { void concurrent_monitor::prepare_wait( thread_context& thr, uintptr_t ctx ) { if( !thr.ready ) thr.init(); - // this is good place to pump previous spurious wakeup - else if( thr.spurious ) { - thr.spurious = false; + // this is good place to pump previous skipped wakeup + else if( thr.skipped_wakeup ) { + thr.skipped_wakeup = false; thr.semaphore().P(); } thr.context = ctx; @@ -52,18 +52,17 @@ void concurrent_monitor::prepare_wait( thread_context& thr, uintptr_t ctx ) { } void concurrent_monitor::cancel_wait( thread_context& thr ) { - // spurious wakeup will be pumped in the following prepare_wait() - thr.spurious = true; + // possible skipped wakeup will be pumped in the following prepare_wait() + thr.skipped_wakeup = true; // try to remove node from waitset bool th_in_waitset = thr.in_waitset; if( th_in_waitset ) { tbb::spin_mutex::scoped_lock l( mutex_ec ); if (thr.in_waitset) { - // successfully removed from waitset, - // so there will be no spurious wakeup - thr.in_waitset = false; - thr.spurious = false; waitset_ec.remove( (waitset_t::node_t&)thr ); + // node is removed from waitset, so there will be no wakeup + thr.in_waitset = false; + thr.skipped_wakeup = false; } } } diff --git a/src/tbb/concurrent_monitor.h b/src/tbb/concurrent_monitor.h index e3aca27..1efa52e 100644 --- a/src/tbb/concurrent_monitor.h +++ b/src/tbb/concurrent_monitor.h @@ -100,13 +100,13 @@ public: class thread_context : waitset_node_t, no_copy { friend class concurrent_monitor; public: - thread_context() : spurious(false), aborted(false), ready(false), context(0) { + thread_context() : skipped_wakeup(false), aborted(false), ready(false), context(0) { epoch = 0; in_waitset = false; } ~thread_context() { if (ready) { - if( spurious ) semaphore().P(); + if( skipped_wakeup ) semaphore().P(); semaphore().~binary_semaphore(); } } @@ -119,7 +119,7 @@ public: tbb::aligned_space sema; __TBB_atomic unsigned epoch; tbb::atomic in_waitset; - bool spurious; + bool skipped_wakeup; bool aborted; bool ready; uintptr_t context; diff --git a/src/tbb/market.cpp b/src/tbb/market.cpp index 894cf4f..c922609 100644 --- a/src/tbb/market.cpp +++ b/src/tbb/market.cpp @@ -360,10 +360,10 @@ void market::try_destroy_arena ( arena* a, uintptr_t aba_epoch ) { } /** This method must be invoked under my_arenas_list_mutex. **/ -arena* market::arena_in_need ( arena_list_type &arenas, arena *&next ) { +arena* market::arena_in_need ( arena_list_type &arenas, arena *hint ) { if ( arenas.empty() ) return NULL; - arena_list_type::iterator it = next; + arena_list_type::iterator it = hint; __TBB_ASSERT( it != arenas.end(), NULL ); do { arena& a = *it; @@ -375,11 +375,9 @@ arena* market::arena_in_need ( arena_list_type &arenas, arena *&next ) { #endif ) { a.my_references += arena::ref_worker; - as_atomic(next) = &*it; // a subject for innocent data race under the reader lock - // TODO: rework global round robin policy to local or random to avoid this write return &a; } - } while ( it != next ); + } while ( it != hint ); return NULL; } @@ -415,6 +413,16 @@ int market::update_allotment ( arena_list_type& arenas, int workers_demand, int return assigned; } +/** This method must be invoked under my_arenas_list_mutex. **/ +bool market::is_arena_in_list( arena_list_type &arenas, arena *a ) { + if ( a ) { + for ( arena_list_type::iterator it = arenas.begin(); it != arenas.end(); ++it ) + if ( a == &*it ) + return true; + } + return false; +} + #if __TBB_TASK_PRIORITY inline void market::update_global_top_priority ( intptr_t newPriority ) { GATHER_STATISTIC( ++governor::local_scheduler_if_initialized()->my_counters.market_prio_switches ); @@ -432,21 +440,29 @@ inline void market::reset_global_priority () { update_global_top_priority(normalized_normal_priority); } -arena* market::arena_in_need ( arena* prev_arena ) -{ - suppress_unused_warning(prev_arena); +arena* market::arena_in_need ( arena* prev_arena ) { if( as_atomic(my_total_demand) <= 0 ) return NULL; arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex, /*is_writer=*/false); assert_market_valid(); int p = my_global_top_priority; arena *a = NULL; - do { - priority_level_info &pl = my_priority_levels[p]; + + // Checks if arena is alive or not + if ( is_arena_in_list( my_priority_levels[p].arenas, prev_arena ) ) { + a = arena_in_need( my_priority_levels[p].arenas, prev_arena ); + } + + while ( !a && p >= my_global_bottom_priority ) { + priority_level_info &pl = my_priority_levels[p--]; a = arena_in_need( pl.arenas, pl.next_arena ); + if ( a ) { + as_atomic(pl.next_arena) = a; // a subject for innocent data race under the reader lock + // TODO: rework global round robin policy to local or random to avoid this write + } // TODO: When refactoring task priority code, take into consideration the // __TBB_TRACK_PRIORITY_LEVEL_SATURATION sections from earlier versions of TBB - } while ( !a && --p >= my_global_bottom_priority ); + } return a; } @@ -681,7 +697,8 @@ void market::adjust_demand ( arena& a, int delta ) { void market::process( job& j ) { generic_scheduler& s = static_cast(j); - arena *a = NULL; + // s.my_arena can be dead. Don't access it until arena_in_need is called + arena *a = s.my_arena; __TBB_ASSERT( governor::is_set(&s), NULL ); enum { query_interval = 1000, @@ -691,6 +708,7 @@ void market::process( job& j ) { while ( (a = arena_in_need(a)) ) { a->process(s); + a = NULL; // To avoid double checks in arena_in_need i = first_interval; } // Workers leave market because there is no arena in need. It can happen earlier than diff --git a/src/tbb/market.h b/src/tbb/market.h index cc76805..d6bdf10 100644 --- a/src/tbb/market.h +++ b/src/tbb/market.h @@ -217,6 +217,7 @@ private: update_allotment( my_arenas, my_total_demand, (int)my_num_workers_soft_limit ); } + // TODO: consider to rewrite the code with is_arena_in_list function //! Returns next arena that needs more workers, or NULL. arena* arena_in_need (arena*) { if(__TBB_load_with_acquire(my_total_demand) <= 0) @@ -234,10 +235,12 @@ private: void remove_arena_from_list ( arena& a ); - arena* arena_in_need ( arena_list_type &arenas, arena *&next ); + arena* arena_in_need ( arena_list_type &arenas, arena *hint ); static int update_allotment ( arena_list_type& arenas, int total_demand, int max_workers ); + bool is_arena_in_list( arena_list_type &arenas, arena *a ); + //////////////////////////////////////////////////////////////////////////////// // Implementation of rml::tbb_client interface methods diff --git a/src/tbb/private_server.cpp b/src/tbb/private_server.cpp index ae25e57..bd79453 100644 --- a/src/tbb/private_server.cpp +++ b/src/tbb/private_server.cpp @@ -275,6 +275,7 @@ void private_worker::run() { // Check/set the invariant for sleeping if( my_state!=st_quit && my_server.try_insert_in_asleep_list(*this) ) { my_thread_monitor.commit_wait(c); + __TBB_ASSERT( my_state==st_quit || !my_next, "Thread monitor missed a spurious wakeup?" ); my_server.propagate_chain_reaction(); } else { // Invariant broken @@ -310,8 +311,10 @@ inline void private_worker::wake_or_launch() { release_handle(my_handle, governor::does_client_join_workers(my_client)); } } - else + else { + __TBB_ASSERT( !my_next, "Should not wake a thread while it's still in asleep list" ); my_thread_monitor.notify(); + } } //------------------------------------------------------------------------ @@ -390,8 +393,11 @@ void private_server::wake_some( int additional_slack ) { } } done: - while( w>wakee ) - (*--w)->wake_or_launch(); + while( w>wakee ) { + private_worker* ww = *--w; + ww->my_next = NULL; + ww->wake_or_launch(); + } } void private_server::adjust_job_count_estimate( int delta ) { diff --git a/src/tbb/semaphore.h b/src/tbb/semaphore.h index e80e931..ffcf680 100644 --- a/src/tbb/semaphore.h +++ b/src/tbb/semaphore.h @@ -191,6 +191,8 @@ private: #if __TBB_USE_FUTEX class binary_semaphore : no_copy { +// The implementation is equivalent to the "Mutex, Take 3" one +// in the paper "Futexes Are Tricky" by Ulrich Drepper public: //! ctor binary_semaphore() { my_sem = 1; } @@ -202,7 +204,7 @@ public: if( (s = my_sem.compare_and_swap( 1, 0 ))!=0 ) { if( s!=2 ) s = my_sem.fetch_and_store( 2 ); - while( s!=0 ) { + while( s!=0 ) { // This loop deals with spurious wakeup futex_wait( &my_sem, 2 ); s = my_sem.fetch_and_store( 2 ); } @@ -211,14 +213,11 @@ public: //! post/release void V() { __TBB_ASSERT( my_sem>=1, "multiple V()'s in a row?" ); - if( my_sem--!=1 ) { - //if old value was 2 - my_sem = 0; + if( my_sem.fetch_and_store( 0 )==2 ) futex_wakeup_one( &my_sem ); - } } private: - atomic my_sem; + atomic my_sem; // 0 - open; 1 - closed, no waits; 2 - closed, possible waits }; #else typedef uint32_t sem_count_t; diff --git a/src/tbbmalloc/backend.cpp b/src/tbbmalloc/backend.cpp index cd57b38..44d3bcc 100644 --- a/src/tbbmalloc/backend.cpp +++ b/src/tbbmalloc/backend.cpp @@ -650,7 +650,7 @@ FreeBlock *Backend::splitAlignedBlock(FreeBlock *fBlock, int num, size_t size, return fBlock; } -inline size_t Backend::getMaxBinnedSize() const +size_t Backend::getMaxBinnedSize() const { return hugePages.isEnabled && !inUserPool() ? maxBinned_HugePage : maxBinned_SmallPage; diff --git a/src/tbbmalloc/frontend.cpp b/src/tbbmalloc/frontend.cpp index c854a04..25930d2 100644 --- a/src/tbbmalloc/frontend.cpp +++ b/src/tbbmalloc/frontend.cpp @@ -2340,7 +2340,7 @@ static void *allocateAligned(MemoryPool *memPool, size_t size, size_t alignment) } static void *reallocAligned(MemoryPool *memPool, void *ptr, - size_t size, size_t alignment = 0) + size_t newSize, size_t alignment = 0) { void *result; size_t copySize; @@ -2348,32 +2348,46 @@ static void *reallocAligned(MemoryPool *memPool, void *ptr, if (isLargeObject(ptr)) { LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock; copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb); - if (size <= copySize && (0==alignment || isAligned(ptr, alignment))) { - lmb->objectSize = size; - return ptr; - } else { - copySize = lmb->objectSize; + + // Apply different strategies if size decreases + if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) { + + // For huge objects (that do not fit in backend cache), keep the same space unless + // the new size is at least twice smaller + bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize(); + size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0; + if (newSize > threshold) { + lmb->objectSize = newSize; + return ptr; + } + // TODO: For large objects suitable for the backend cache, + // split out the excessive part and put it to the backend. + } + // Reallocate for real + copySize = lmb->objectSize; #if BACKEND_HAS_MREMAP - if (void *r = memPool->extMemPool.remap(ptr, copySize, size, - alignmentextMemPool.remap(ptr, copySize, newSize, + alignment < largeObjectAlignment ? largeObjectAlignment : alignment)) + return r; #endif - result = alignment ? allocateAligned(memPool, size, alignment) : - internalPoolMalloc(memPool, size); - } + result = alignment ? allocateAligned(memPool, newSize, alignment) : + internalPoolMalloc(memPool, newSize); + } else { Block* block = (Block *)alignDown(ptr, slabSize); copySize = block->findObjectSize(ptr); - if (size <= copySize && (0==alignment || isAligned(ptr, alignment))) { + + // TODO: Move object to another bin if size decreases and the current bin is "empty enough". + // Currently, in case of size decreasing, old pointer is returned + if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) { return ptr; } else { - result = alignment ? allocateAligned(memPool, size, alignment) : - internalPoolMalloc(memPool, size); + result = alignment ? allocateAligned(memPool, newSize, alignment) : + internalPoolMalloc(memPool, newSize); } } if (result) { - memcpy(result, ptr, copySize(ptr)) { + // TODO: return the maximum memory size, that can be written to this object LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock; return lmb->objectSize; } else diff --git a/src/tbbmalloc/tbbmalloc_internal.h b/src/tbbmalloc/tbbmalloc_internal.h index d31e757..b173ca1 100644 --- a/src/tbbmalloc/tbbmalloc_internal.h +++ b/src/tbbmalloc/tbbmalloc_internal.h @@ -847,7 +847,7 @@ public: memSoftLimit = softLimit; releaseCachesToLimit(); } - inline size_t getMaxBinnedSize() const; + size_t getMaxBinnedSize() const; bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); } diff --git a/src/test/test_blocked_range.cpp b/src/test/test_blocked_range.cpp index 4083aa7..6f67a3f 100644 --- a/src/test/test_blocked_range.cpp +++ b/src/test/test_blocked_range.cpp @@ -165,6 +165,25 @@ void TestProportionalSplitOverflow() REMARK("OK\n"); } #endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */ + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +void TestDeductionGuides() { + std::vector v; + + // check blocked_range(Value, Value, size_t) + tbb::blocked_range r1(v.begin(), v.end()); + static_assert(std::is_same>::value); + + // check blocked_range(blocked_range &) + tbb::blocked_range r2(r1); + static_assert(std::is_same::value); + + // check blocked_range(blocked_range &&) + tbb::blocked_range r3(std::move(r1)); + static_assert(std::is_same::value); +} +#endif + //------------------------------------------------------------------------ // Test driver #include "tbb/task_scheduler_init.h" @@ -184,5 +203,8 @@ int TestMain () { TestProportionalSplitOverflow(); #endif + #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + TestDeductionGuides(); + #endif return Harness::Done; } diff --git a/src/test/test_blocked_range2d.cpp b/src/test/test_blocked_range2d.cpp index d4985ea..b3a878f 100644 --- a/src/test/test_blocked_range2d.cpp +++ b/src/test/test_blocked_range2d.cpp @@ -136,6 +136,26 @@ void ParallelTest() { } } +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +#include +void TestDeductionGuides() { + std::vector v; + std::vector v2; + + // check blocked_range2d(RowValue, RowValue, size_t, ColValue, ColValue, size_t) + tbb::blocked_range2d r1(v.begin(), v.end(), 2, v2.begin(), v2.end(), 2); + static_assert(std::is_same>::value); + + // check blocked_range2d(blocked_range2d &) + tbb::blocked_range2d r2(r1); + static_assert(std::is_same::value); + + // check blocked_range2d(blocked_range2d &&) + tbb::blocked_range2d r3(std::move(r1)); + static_assert(std::is_same::value); +} +#endif + #include "tbb/task_scheduler_init.h" int TestMain () { @@ -144,5 +164,9 @@ int TestMain () { tbb::task_scheduler_init init(p); ParallelTest(); } + + #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + TestDeductionGuides(); + #endif return Harness::Done; } diff --git a/src/test/test_blocked_range3d.cpp b/src/test/test_blocked_range3d.cpp index b9a8c86..111d7e8 100644 --- a/src/test/test_blocked_range3d.cpp +++ b/src/test/test_blocked_range3d.cpp @@ -167,6 +167,28 @@ void ParallelTest() { } } +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +#include +void TestDeductionGuides() { + std::vector v; + std::vector v2; + std::vector> v3; + + // check blocked_range2d(PageValue, PageValue, size_t, RowValue, RowValue, size_t, ColValue, ColValue, size_t) + tbb::blocked_range3d r1(v.begin(), v.end(), 2, v2.begin(), v2.end(), 2, v3.begin(), v3.end(), 6); + static_assert(std::is_same>::value); + + // check blocked_range2d(blocked_range3d &) + tbb::blocked_range3d r2(r1); + static_assert(std::is_same::value); + + // check blocked_range2d(blocked_range3d &&) + tbb::blocked_range3d r3(std::move(r1)); + static_assert(std::is_same::value); +} +#endif + #include "tbb/task_scheduler_init.h" int TestMain () { @@ -175,5 +197,9 @@ int TestMain () { tbb::task_scheduler_init init(p); ParallelTest(); } + + #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + TestDeductionGuides(); + #endif return Harness::Done; } diff --git a/src/test/test_concurrent_hash_map.cpp b/src/test/test_concurrent_hash_map.cpp index e36006c..1c83fd0 100644 --- a/src/test/test_concurrent_hash_map.cpp +++ b/src/test/test_concurrent_hash_map.cpp @@ -1203,12 +1203,16 @@ void Examine( Table c, const std::list &lst) { a.deallocate( ptr, 1 ); } +template +struct debug_hash_compare : tbb::tbb_hash_compare {}; + template void TypeTester( const std::list &lst ) { __TBB_ASSERT( lst.size() >= 5, "Array should have at least 5 elements" ); typedef typename Value::first_type first_type; typedef typename Value::second_type second_type; typedef tbb::concurrent_hash_map ch_map; + debug_hash_compare compare; // Construct an empty hash map. ch_map c1; c1.insert( lst.begin(), lst.end() ); @@ -1216,34 +1220,55 @@ void TypeTester( const std::list &lst ) { #if __TBB_INITIALIZER_LISTS_PRESENT && !__TBB_CPP11_INIT_LIST_TEMP_OBJS_LIFETIME_BROKEN // Constructor from initializer_list. typename std::list::const_iterator it = lst.begin(); - ch_map c2( {*it++, *it++, *it++} ); + std::initializer_list il = { *it++, *it++, *it++ }; + ch_map c2( il ); c2.insert( it, lst.end() ); Examine( c2, lst ); + + // Constructor from initializer_list and compare object + ch_map c3( il, compare); + c3.insert( it, lst.end() ); + Examine( c3, lst ); + + // Constructor from initializer_list, compare object and allocator + ch_map c4( il, compare, typename ch_map::allocator_type()); + c4.insert( it, lst.end()); + Examine( c4, lst ); #endif // Copying constructor. - ch_map c3(c1); - Examine( c3, lst ); + ch_map c5(c1); + Examine( c5, lst ); // Construct with non-default allocator typedef tbb::concurrent_hash_map< first_type,second_type,tbb::tbb_hash_compare,debug_allocator > ch_map_debug_alloc; - ch_map_debug_alloc c4; - c4.insert( lst.begin(), lst.end() ); - Examine( c4, lst ); - // Copying constructor for vector with different allocator type. - ch_map_debug_alloc c5(c4); - Examine( c5, lst ); - // Construction empty table with n preallocated buckets. - ch_map c6( lst.size() ); + ch_map_debug_alloc c6; c6.insert( lst.begin(), lst.end() ); Examine( c6, lst ); - ch_map_debug_alloc c7( lst.size() ); - c7.insert( lst.begin(), lst.end() ); + // Copying constructor + ch_map_debug_alloc c7(c6); Examine( c7, lst ); - // Construction with copying iteration range and given allocator instance. - ch_map c8( c1.begin(), c1.end() ); + // Construction empty table with n preallocated buckets. + ch_map c8( lst.size() ); + c8.insert( lst.begin(), lst.end() ); Examine( c8, lst ); - debug_allocator allocator; - ch_map_debug_alloc c9( lst.begin(), lst.end(), allocator ); + ch_map_debug_alloc c9( lst.size() ); + c9.insert( lst.begin(), lst.end() ); Examine( c9, lst ); + // Construction with copying iteration range. + ch_map c10( c1.begin(), c1.end() ); + Examine( c10, lst ); + // Construction with copying iteration range and given allocator instance. + debug_allocator allocator; + ch_map_debug_alloc c11( lst.begin(), lst.end(), allocator ); + Examine( c11, lst ); + + typedef tbb::concurrent_hash_map< first_type,second_type,debug_hash_compare,typename ch_map::allocator_type> ch_map_debug_hash; + + // Constructor with two iterators and hash_compare + ch_map_debug_hash c12(c1.begin(), c1.end(), compare); + Examine( c12, lst ); + + ch_map_debug_hash c13(c1.begin(), c1.end(), compare, typename ch_map::allocator_type()); + Examine( c13, lst ); } #if __TBB_CPP11_SMART_POINTERS_PRESENT @@ -1361,6 +1386,39 @@ void TestMoveSupport(){ REPORT("Known issue: tests for C++11 move semantics support are skipped.\n"); } #endif //__TBB_CPP11_RVALUE_REF_PRESENT + +template +struct non_default_constructible_hash_compare : tbb::tbb_hash_compare { + non_default_constructible_hash_compare() { + ASSERT(false, "Hash compare object must not default construct during the construction of hash_map with compare argument"); + } + + non_default_constructible_hash_compare(int) {} +}; + +void TestHashCompareConstructors() { + typedef int key_type; + typedef tbb::concurrent_hash_map > map_type; + + non_default_constructible_hash_compare compare(0); + map_type::allocator_type allocator; + + map_type map1(compare); + map_type map2(compare, allocator); + + map_type map3(1, compare); + map_type map4(1, compare, allocator); + + std::vector reference_vector; + map_type map5(reference_vector.begin(), reference_vector.end(), compare); + map_type map6(reference_vector.begin(), reference_vector.end(), compare, allocator); + +#if __TBB_INITIALIZER_LISTS_PRESENT + map_type map7({}, compare); + map_type map8({}, compare, allocator); +#endif +} + //------------------------------------------------------------------------ // Test driver //------------------------------------------------------------------------ @@ -1419,6 +1477,7 @@ int TestMain () { } TestCPP11Types(); + TestHashCompareConstructors(); return Harness::Done; } diff --git a/src/test/test_concurrent_priority_queue.cpp b/src/test/test_concurrent_priority_queue.cpp index 9773a03..0836fee 100644 --- a/src/test/test_concurrent_priority_queue.cpp +++ b/src/test/test_concurrent_priority_queue.cpp @@ -1063,6 +1063,50 @@ void TestTypes() { #endif /* __TBB_CPP11_SMART_POINTERS_PRESENT */ } +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template