The list of most significant changes made over time in
Intel(R) Threading Building Blocks (Intel(R) TBB).
+Intel TBB 2019 Update 2
+TBB_INTERFACE_VERSION == 11002
+
+Changes (w.r.t. Intel TBB 2019 Update 1):
+
+- Added constructors with HashCompare argument to concurrent_hash_map
+ (https://github.com/01org/tbb/pull/63).
+- Added overloads for parallel_reduce with default partitioner and
+ user-supplied context.
+- Added deduction guides for tbb containers: concurrent_vector,
+ concurrent_queue, concurrent_bounded_queue,
+ concurrent_priority_queue.
+- Reallocation of memory objects >1MB now copies and frees memory if
+ the size is decreased twice or more, trading performance off for
+ reduced memory usage.
+- After a period of sleep, TBB worker threads now prefer returning to
+ their last used task arena.
+
+Bugs fixed:
+
+- Fixed compilation of task_group.h when targeting macOS* 10.11 or
+ earlier (https://github.com/conda-forge/tbb-feedstock/issues/42).
+
+------------------------------------------------------------------------
Intel TBB 2019 Update 1
TBB_INTERFACE_VERSION == 11001
observer.
- Fixed compilation of task_group.h by Visual C++* 15.7 with
/permissive- option (https://github.com/01org/tbb/issues/53).
+- Fixed tbb4py to avoid dependency on Intel(R) C++ Compiler shared
+ libraries.
+- Fixed compilation for Anaconda environment with GCC 7.3 and higher.
------------------------------------------------------------------------
Intel TBB 2019
-# Threading Building Blocks 2019 Update 1
-[![Stable release](https://img.shields.io/badge/version-2019_U1-green.svg)](https://github.com/01org/tbb/releases/tag/2019_U1)
+# Threading Building Blocks 2019 Update 2
+[![Stable release](https://img.shields.io/badge/version-2019_U2-green.svg)](https://github.com/01org/tbb/releases/tag/2019_U2)
[![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE)
Threading Building Blocks (TBB) lets you easily write parallel C++ programs that take
GNU Compilers (gcc) 4.1 - 7.1
GNU C Library (glibc) version 2.4 - 2.19
Xcode* 7.0 - 9.1
- Android* NDK r10e - r16
+ Android* NDK r10e - r17b
Software - Supported Performance Analysis Tools
: internal::hash_map_base(), my_allocator(a)
{}
+ explicit concurrent_hash_map( const HashCompare& compare, const allocator_type& a = allocator_type() )
+ : internal::hash_map_base(), my_allocator(a), my_hash_compare(compare)
+ {}
+
//! Construct empty table with n preallocated buckets. This number serves also as initial concurrency level.
concurrent_hash_map( size_type n, const allocator_type &a = allocator_type() )
- : my_allocator(a)
+ : internal::hash_map_base(), my_allocator(a)
+ {
+ reserve( n );
+ }
+
+ concurrent_hash_map( size_type n, const HashCompare& compare, const allocator_type& a = allocator_type() )
+ : internal::hash_map_base(), my_allocator(a), my_hash_compare(compare)
{
reserve( n );
}
//! Construction with copying iteration range and given allocator instance
template<typename I>
concurrent_hash_map( I first, I last, const allocator_type &a = allocator_type() )
- : my_allocator(a)
+ : internal::hash_map_base(), my_allocator(a)
+ {
+ call_clear_on_leave scope_guard(this);
+ internal_copy(first, last, std::distance(first, last));
+ scope_guard.dismiss();
+ }
+
+ template<typename I>
+ concurrent_hash_map( I first, I last, const HashCompare& compare, const allocator_type& a = allocator_type() )
+ : internal::hash_map_base(), my_allocator(a), my_hash_compare(compare)
{
call_clear_on_leave scope_guard(this);
internal_copy(first, last, std::distance(first, last));
#if __TBB_INITIALIZER_LISTS_PRESENT
//! Construct empty table with n preallocated buckets. This number serves also as initial concurrency level.
concurrent_hash_map( std::initializer_list<value_type> il, const allocator_type &a = allocator_type() )
- : my_allocator(a)
+ : internal::hash_map_base(), my_allocator(a)
+ {
+ call_clear_on_leave scope_guard(this);
+ internal_copy(il.begin(), il.end(), il.size());
+ scope_guard.dismiss();
+ }
+
+ concurrent_hash_map( std::initializer_list<value_type> il, const HashCompare& compare, const allocator_type& a = allocator_type() )
+ : internal::hash_map_base(), my_allocator(a), my_hash_compare(compare)
{
call_clear_on_leave scope_guard(this);
internal_copy(il.begin(), il.end(), il.size());
//! Copy constructor
/** This operation is unsafe if there are pending concurrent operations on the src queue. */
- explicit concurrent_priority_queue(const concurrent_priority_queue& src) : mark(src.mark),
+ concurrent_priority_queue(const concurrent_priority_queue& src) : mark(src.mark),
my_size(src.my_size), data(src.data.begin(), src.data.end(), src.data.get_allocator())
{
my_aggregator.initialize_handler(my_functor_t(this));
}
};
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+// Deduction guide for the constructor from two iterators
+template<typename InputIterator,
+ typename T = typename std::iterator_traits<InputIterator>::value_type,
+ typename A = cache_aligned_allocator<T>
+> concurrent_priority_queue(InputIterator, InputIterator, const A& = A())
+-> concurrent_priority_queue<T, std::less<T>, A>;
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
} // namespace interface5
using interface5::concurrent_priority_queue;
const_iterator unsafe_end() const {return const_iterator();}
} ;
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+// Deduction guide for the constructor from two iterators
+template<typename InputIterator,
+ typename T = typename std::iterator_traits<InputIterator>::value_type,
+ typename A = cache_aligned_allocator<T>
+> concurrent_queue(InputIterator, InputIterator, const A& = A())
+-> concurrent_queue<T, A>;
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+
template<typename T, class A>
concurrent_queue<T,A>::~concurrent_queue() {
clear();
};
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+// guide for concurrent_bounded_queue(InputIterator, InputIterator, ...)
+template<typename InputIterator,
+ typename T = typename std::iterator_traits<InputIterator>::value_type,
+ typename A = cache_aligned_allocator<T>
+> concurrent_bounded_queue(InputIterator, InputIterator, const A& = A())
+-> concurrent_bounded_queue<T, A>;
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+
template<typename T, class A>
concurrent_bounded_queue<T,A>::~concurrent_bounded_queue() {
clear();
};
};
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+// Deduction guide for the constructor from two iterators
+template<typename I,
+ typename T = typename std::iterator_traits<I>::value_type,
+ typename A = cache_aligned_allocator<T>
+> concurrent_vector(I, I, const A& = A())
+-> concurrent_vector<T, A>;
+
+// Deduction guide for the constructor from a vector and allocator
+template<typename T, typename A1, typename A2>
+concurrent_vector(const concurrent_vector<T, A1> &, const A2 &)
+-> concurrent_vector<T, A2>;
+
+// Deduction guide for the constructor from an initializer_list
+template<typename T, typename A = cache_aligned_allocator<T>
+> concurrent_vector(std::initializer_list<T>, const A& = A())
+-> concurrent_vector<T, A>;
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning (push)
#pragma warning (disable: 4701) // potentially uninitialized local variable "old"
}
#if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration with reduction, default partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, task_group_context& context ) {
+ internal::start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
+}
+
//! Parallel iteration with reduction, simple partitioner and user-supplied context.
/** @ingroup algorithms **/
template<typename Range, typename Body>
}
#if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration with reduction, default partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+ task_group_context& context ) {
+ internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+ internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
+ ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
+ return body.result();
+}
+
//! Parallel iteration with reduction, simple partitioner and user-supplied context.
/** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduction>
#endif
#if __clang__
- /** according to clang documentation, version can be vendor specific **/
+ // according to clang documentation, version can be vendor specific
#define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
#endif
#define __TBB_IOS 1
#endif
+#if __APPLE__
+ #if __INTEL_COMPILER && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1099 \
+ && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101000
+ // ICC does not correctly set the macro if -mmacosx-min-version is not specified
+ #define __TBB_MACOS_TARGET_VERSION (100000 + 10*(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ - 1000))
+ #else
+ #define __TBB_MACOS_TARGET_VERSION __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__
+ #endif
+#endif
+
/** Preprocessor symbols to determine HW architecture **/
#if _WIN32||_WIN64
#define __TBB_ALIGNAS_PRESENT (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1500)
#define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT (__INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1210)
#define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__cplusplus >= 201402L)
+ #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT __INTEL_COMPILER > 1900
#elif __clang__
/** TODO: these options need to be rechecked **/
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT __has_feature(__cxx_variadic_templates__)
#define __TBB_ALIGNAS_PRESENT __has_feature(cxx_alignas)
#define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT __has_feature(cxx_alias_templates)
#define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__cplusplus >= 201402L)
+ #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__has_feature(__cpp_deduction_guides))
#elif __GNUC__
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT __GXX_EXPERIMENTAL_CXX0X__
#define __TBB_CPP11_VARIADIC_FIXED_LENGTH_EXP_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40700)
#define __TBB_ALIGNAS_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40800)
#define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40700)
#define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__cplusplus >= 201402L && __TBB_GCC_VERSION >= 50000)
+ #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201606)
#elif _MSC_VER
// These definitions are also used with Intel C++ Compiler in "default" mode (__INTEL_CXX11_MODE__ == 0);
// see a comment in "__INTEL_COMPILER" section above.
#define __TBB_ALIGNAS_PRESENT (_MSC_VER >= 1900)
#define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT (_MSC_VER >= 1800)
#define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (_MSC_VER >= 1900)
+ #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (_MSVC_LANG >= 201703L)
#else
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT 0
#define __TBB_CPP11_RVALUE_REF_PRESENT 0
#define __TBB_ALIGNAS_PRESENT 0
#define __TBB_CPP11_TEMPLATE_ALIASES_PRESENT 0
#define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__cplusplus >= 201402L)
+ #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 0
#endif
// C++11 standard library features
#define __TBB_CPP11_GET_NEW_HANDLER_PRESENT (_MSC_VER >= 1900 || __TBB_GLIBCXX_VERSION >= 40900 && __GXX_EXPERIMENTAL_CXX0X__ || _LIBCPP_VERSION)
-#define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions || _LIBCPP_VERSION >= 3700)
+#define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions \
+ || _LIBCPP_VERSION >= 3700 && (!__TBB_MACOS_TARGET_VERSION || __TBB_MACOS_TARGET_VERSION >= 101200))
// std::swap is in <utility> only since C++11, though MSVC had it at least since VS2005
#if _MSC_VER>=1400 || _LIBCPP_VERSION || __GXX_EXPERIMENTAL_CXX0X__
#define TBB_VERSION_MINOR 0
// Engineering-focused interface version
-#define TBB_INTERFACE_VERSION 11001
+#define TBB_INTERFACE_VERSION 11002
#define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000
// The oldest major interface version still supported
friend class thread_monitor;
tbb::atomic<size_t> my_epoch;
};
- thread_monitor() : spurious(false), my_sema() {
+ thread_monitor() : skipped_wakeup(false), my_sema() {
my_cookie.my_epoch = 0;
ITT_SYNC_CREATE(&my_sema, SyncType_RML, SyncObj_ThreadMonitor);
in_wait = false;
//! Detach thread
static void detach_thread(handle_type handle);
private:
- cookie my_cookie;
- tbb::atomic<bool> in_wait;
- bool spurious;
+ cookie my_cookie; // epoch counter
+ tbb::atomic<bool> in_wait;
+ bool skipped_wakeup;
tbb::internal::binary_semaphore my_sema;
#if USE_PTHREAD
static void check( int error_code, const char* routine );
}
inline void thread_monitor::prepare_wait( cookie& c ) {
- if( spurious ) {
- spurious = false;
- // consumes a spurious posted signal. don't wait on my_sema.
- my_sema.P();
+ if( skipped_wakeup ) {
+ // Lazily consume a signal that was skipped due to cancel_wait
+ skipped_wakeup = false;
+ my_sema.P(); // does not really wait on the semaphore
}
c = my_cookie;
- in_wait = true;
- __TBB_full_memory_fence();
+ in_wait.store<tbb::full_fence>( true );
}
inline void thread_monitor::commit_wait( cookie& c ) {
- bool do_it = ( c.my_epoch == my_cookie.my_epoch);
+ bool do_it = ( c.my_epoch == my_cookie.my_epoch );
if( do_it ) my_sema.P();
else cancel_wait();
}
inline void thread_monitor::cancel_wait() {
- spurious = ! in_wait.fetch_and_store( false );
+ // if not in_wait, then some thread has sent us a signal;
+ // it will be consumed by the next prepare_wait call
+ skipped_wakeup = ! in_wait.fetch_and_store( false );
}
} // namespace internal
void concurrent_monitor::prepare_wait( thread_context& thr, uintptr_t ctx ) {
if( !thr.ready )
thr.init();
- // this is good place to pump previous spurious wakeup
- else if( thr.spurious ) {
- thr.spurious = false;
+ // this is good place to pump previous skipped wakeup
+ else if( thr.skipped_wakeup ) {
+ thr.skipped_wakeup = false;
thr.semaphore().P();
}
thr.context = ctx;
}
void concurrent_monitor::cancel_wait( thread_context& thr ) {
- // spurious wakeup will be pumped in the following prepare_wait()
- thr.spurious = true;
+ // possible skipped wakeup will be pumped in the following prepare_wait()
+ thr.skipped_wakeup = true;
// try to remove node from waitset
bool th_in_waitset = thr.in_waitset;
if( th_in_waitset ) {
tbb::spin_mutex::scoped_lock l( mutex_ec );
if (thr.in_waitset) {
- // successfully removed from waitset,
- // so there will be no spurious wakeup
- thr.in_waitset = false;
- thr.spurious = false;
waitset_ec.remove( (waitset_t::node_t&)thr );
+ // node is removed from waitset, so there will be no wakeup
+ thr.in_waitset = false;
+ thr.skipped_wakeup = false;
}
}
}
class thread_context : waitset_node_t, no_copy {
friend class concurrent_monitor;
public:
- thread_context() : spurious(false), aborted(false), ready(false), context(0) {
+ thread_context() : skipped_wakeup(false), aborted(false), ready(false), context(0) {
epoch = 0;
in_waitset = false;
}
~thread_context() {
if (ready) {
- if( spurious ) semaphore().P();
+ if( skipped_wakeup ) semaphore().P();
semaphore().~binary_semaphore();
}
}
tbb::aligned_space<binary_semaphore> sema;
__TBB_atomic unsigned epoch;
tbb::atomic<bool> in_waitset;
- bool spurious;
+ bool skipped_wakeup;
bool aborted;
bool ready;
uintptr_t context;
}
/** This method must be invoked under my_arenas_list_mutex. **/
-arena* market::arena_in_need ( arena_list_type &arenas, arena *&next ) {
+arena* market::arena_in_need ( arena_list_type &arenas, arena *hint ) {
if ( arenas.empty() )
return NULL;
- arena_list_type::iterator it = next;
+ arena_list_type::iterator it = hint;
__TBB_ASSERT( it != arenas.end(), NULL );
do {
arena& a = *it;
#endif
) {
a.my_references += arena::ref_worker;
- as_atomic(next) = &*it; // a subject for innocent data race under the reader lock
- // TODO: rework global round robin policy to local or random to avoid this write
return &a;
}
- } while ( it != next );
+ } while ( it != hint );
return NULL;
}
return assigned;
}
+/** This method must be invoked under my_arenas_list_mutex. **/
+bool market::is_arena_in_list( arena_list_type &arenas, arena *a ) {
+ if ( a ) {
+ for ( arena_list_type::iterator it = arenas.begin(); it != arenas.end(); ++it )
+ if ( a == &*it )
+ return true;
+ }
+ return false;
+}
+
#if __TBB_TASK_PRIORITY
inline void market::update_global_top_priority ( intptr_t newPriority ) {
GATHER_STATISTIC( ++governor::local_scheduler_if_initialized()->my_counters.market_prio_switches );
update_global_top_priority(normalized_normal_priority);
}
-arena* market::arena_in_need ( arena* prev_arena )
-{
- suppress_unused_warning(prev_arena);
+arena* market::arena_in_need ( arena* prev_arena ) {
if( as_atomic(my_total_demand) <= 0 )
return NULL;
arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex, /*is_writer=*/false);
assert_market_valid();
int p = my_global_top_priority;
arena *a = NULL;
- do {
- priority_level_info &pl = my_priority_levels[p];
+
+ // Checks if arena is alive or not
+ if ( is_arena_in_list( my_priority_levels[p].arenas, prev_arena ) ) {
+ a = arena_in_need( my_priority_levels[p].arenas, prev_arena );
+ }
+
+ while ( !a && p >= my_global_bottom_priority ) {
+ priority_level_info &pl = my_priority_levels[p--];
a = arena_in_need( pl.arenas, pl.next_arena );
+ if ( a ) {
+ as_atomic(pl.next_arena) = a; // a subject for innocent data race under the reader lock
+ // TODO: rework global round robin policy to local or random to avoid this write
+ }
// TODO: When refactoring task priority code, take into consideration the
// __TBB_TRACK_PRIORITY_LEVEL_SATURATION sections from earlier versions of TBB
- } while ( !a && --p >= my_global_bottom_priority );
+ }
return a;
}
void market::process( job& j ) {
generic_scheduler& s = static_cast<generic_scheduler&>(j);
- arena *a = NULL;
+ // s.my_arena can be dead. Don't access it until arena_in_need is called
+ arena *a = s.my_arena;
__TBB_ASSERT( governor::is_set(&s), NULL );
enum {
query_interval = 1000,
while ( (a = arena_in_need(a)) )
{
a->process(s);
+ a = NULL; // To avoid double checks in arena_in_need
i = first_interval;
}
// Workers leave market because there is no arena in need. It can happen earlier than
update_allotment( my_arenas, my_total_demand, (int)my_num_workers_soft_limit );
}
+ // TODO: consider to rewrite the code with is_arena_in_list function
//! Returns next arena that needs more workers, or NULL.
arena* arena_in_need (arena*) {
if(__TBB_load_with_acquire(my_total_demand) <= 0)
void remove_arena_from_list ( arena& a );
- arena* arena_in_need ( arena_list_type &arenas, arena *&next );
+ arena* arena_in_need ( arena_list_type &arenas, arena *hint );
static int update_allotment ( arena_list_type& arenas, int total_demand, int max_workers );
+ bool is_arena_in_list( arena_list_type &arenas, arena *a );
+
////////////////////////////////////////////////////////////////////////////////
// Implementation of rml::tbb_client interface methods
// Check/set the invariant for sleeping
if( my_state!=st_quit && my_server.try_insert_in_asleep_list(*this) ) {
my_thread_monitor.commit_wait(c);
+ __TBB_ASSERT( my_state==st_quit || !my_next, "Thread monitor missed a spurious wakeup?" );
my_server.propagate_chain_reaction();
} else {
// Invariant broken
release_handle(my_handle, governor::does_client_join_workers(my_client));
}
}
- else
+ else {
+ __TBB_ASSERT( !my_next, "Should not wake a thread while it's still in asleep list" );
my_thread_monitor.notify();
+ }
}
//------------------------------------------------------------------------
}
}
done:
- while( w>wakee )
- (*--w)->wake_or_launch();
+ while( w>wakee ) {
+ private_worker* ww = *--w;
+ ww->my_next = NULL;
+ ww->wake_or_launch();
+ }
}
void private_server::adjust_job_count_estimate( int delta ) {
#if __TBB_USE_FUTEX
class binary_semaphore : no_copy {
+// The implementation is equivalent to the "Mutex, Take 3" one
+// in the paper "Futexes Are Tricky" by Ulrich Drepper
public:
//! ctor
binary_semaphore() { my_sem = 1; }
if( (s = my_sem.compare_and_swap( 1, 0 ))!=0 ) {
if( s!=2 )
s = my_sem.fetch_and_store( 2 );
- while( s!=0 ) {
+ while( s!=0 ) { // This loop deals with spurious wakeup
futex_wait( &my_sem, 2 );
s = my_sem.fetch_and_store( 2 );
}
//! post/release
void V() {
__TBB_ASSERT( my_sem>=1, "multiple V()'s in a row?" );
- if( my_sem--!=1 ) {
- //if old value was 2
- my_sem = 0;
+ if( my_sem.fetch_and_store( 0 )==2 )
futex_wakeup_one( &my_sem );
- }
}
private:
- atomic<int> my_sem;
+ atomic<int> my_sem; // 0 - open; 1 - closed, no waits; 2 - closed, possible waits
};
#else
typedef uint32_t sem_count_t;
return fBlock;
}
-inline size_t Backend::getMaxBinnedSize() const
+size_t Backend::getMaxBinnedSize() const
{
return hugePages.isEnabled && !inUserPool() ?
maxBinned_HugePage : maxBinned_SmallPage;
}
static void *reallocAligned(MemoryPool *memPool, void *ptr,
- size_t size, size_t alignment = 0)
+ size_t newSize, size_t alignment = 0)
{
void *result;
size_t copySize;
if (isLargeObject<ourMem>(ptr)) {
LargeMemoryBlock* lmb = ((LargeObjectHdr *)ptr - 1)->memoryBlock;
copySize = lmb->unalignedSize-((uintptr_t)ptr-(uintptr_t)lmb);
- if (size <= copySize && (0==alignment || isAligned(ptr, alignment))) {
- lmb->objectSize = size;
- return ptr;
- } else {
- copySize = lmb->objectSize;
+
+ // Apply different strategies if size decreases
+ if (newSize <= copySize && (0 == alignment || isAligned(ptr, alignment))) {
+
+ // For huge objects (that do not fit in backend cache), keep the same space unless
+ // the new size is at least twice smaller
+ bool isMemoryBlockHuge = copySize > memPool->extMemPool.backend.getMaxBinnedSize();
+ size_t threshold = isMemoryBlockHuge ? copySize / 2 : 0;
+ if (newSize > threshold) {
+ lmb->objectSize = newSize;
+ return ptr;
+ }
+ // TODO: For large objects suitable for the backend cache,
+ // split out the excessive part and put it to the backend.
+ }
+ // Reallocate for real
+ copySize = lmb->objectSize;
#if BACKEND_HAS_MREMAP
- if (void *r = memPool->extMemPool.remap(ptr, copySize, size,
- alignment<largeObjectAlignment?
- largeObjectAlignment : alignment))
- return r;
+ if (void *r = memPool->extMemPool.remap(ptr, copySize, newSize,
+ alignment < largeObjectAlignment ? largeObjectAlignment : alignment))
+ return r;
#endif
- result = alignment ? allocateAligned(memPool, size, alignment) :
- internalPoolMalloc(memPool, size);
- }
+ result = alignment ? allocateAligned(memPool, newSize, alignment) :
+ internalPoolMalloc(memPool, newSize);
+
} else {
Block* block = (Block *)alignDown(ptr, slabSize);
copySize = block->findObjectSize(ptr);
- if (size <= copySize && (0==alignment || isAligned(ptr, alignment))) {
+
+ // TODO: Move object to another bin if size decreases and the current bin is "empty enough".
+ // Currently, in case of size decreasing, old pointer is returned
+ if (newSize <= copySize && (0==alignment || isAligned(ptr, alignment))) {
return ptr;
} else {
- result = alignment ? allocateAligned(memPool, size, alignment) :
- internalPoolMalloc(memPool, size);
+ result = alignment ? allocateAligned(memPool, newSize, alignment) :
+ internalPoolMalloc(memPool, newSize);
}
}
if (result) {
- memcpy(result, ptr, copySize<size? copySize: size);
+ memcpy(result, ptr, copySize < newSize ? copySize : newSize);
internalPoolFree(memPool, ptr, 0);
}
return result;
if (ptr) {
MALLOC_ASSERT(isRecognized(ptr), "Invalid pointer in scalable_msize detected.");
if (isLargeObject<ourMem>(ptr)) {
+ // TODO: return the maximum memory size, that can be written to this object
LargeMemoryBlock* lmb = ((LargeObjectHdr*)ptr - 1)->memoryBlock;
return lmb->objectSize;
} else
memSoftLimit = softLimit;
releaseCachesToLimit();
}
- inline size_t getMaxBinnedSize() const;
+ size_t getMaxBinnedSize() const;
bool ptrCanBeValid(void *ptr) const { return usedAddrRange.inRange(ptr); }
REMARK("OK\n");
}
#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+void TestDeductionGuides() {
+ std::vector<const int *> v;
+
+ // check blocked_range(Value, Value, size_t)
+ tbb::blocked_range r1(v.begin(), v.end());
+ static_assert(std::is_same<decltype(r1), tbb::blocked_range<decltype(v)::iterator>>::value);
+
+ // check blocked_range(blocked_range &)
+ tbb::blocked_range r2(r1);
+ static_assert(std::is_same<decltype(r2), decltype(r1)>::value);
+
+ // check blocked_range(blocked_range &&)
+ tbb::blocked_range r3(std::move(r1));
+ static_assert(std::is_same<decltype(r3), decltype(r1)>::value);
+}
+#endif
+
//------------------------------------------------------------------------
// Test driver
#include "tbb/task_scheduler_init.h"
TestProportionalSplitOverflow();
#endif
+ #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+ TestDeductionGuides();
+ #endif
return Harness::Done;
}
}
}
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+#include <vector>
+void TestDeductionGuides() {
+ std::vector<const unsigned long *> v;
+ std::vector<double> v2;
+
+ // check blocked_range2d(RowValue, RowValue, size_t, ColValue, ColValue, size_t)
+ tbb::blocked_range2d r1(v.begin(), v.end(), 2, v2.begin(), v2.end(), 2);
+ static_assert(std::is_same<decltype(r1), tbb::blocked_range2d<decltype(v)::iterator, decltype(v2)::iterator>>::value);
+
+ // check blocked_range2d(blocked_range2d &)
+ tbb::blocked_range2d r2(r1);
+ static_assert(std::is_same<decltype(r2), decltype(r1)>::value);
+
+ // check blocked_range2d(blocked_range2d &&)
+ tbb::blocked_range2d r3(std::move(r1));
+ static_assert(std::is_same<decltype(r3), decltype(r1)>::value);
+}
+#endif
+
#include "tbb/task_scheduler_init.h"
int TestMain () {
tbb::task_scheduler_init init(p);
ParallelTest();
}
+
+ #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+ TestDeductionGuides();
+ #endif
return Harness::Done;
}
}
}
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+#include <vector>
+void TestDeductionGuides() {
+ std::vector<const unsigned long *> v;
+ std::vector<double> v2;
+ std::vector<std::vector<int>> v3;
+
+ // check blocked_range2d(PageValue, PageValue, size_t, RowValue, RowValue, size_t, ColValue, ColValue, size_t)
+ tbb::blocked_range3d r1(v.begin(), v.end(), 2, v2.begin(), v2.end(), 2, v3.begin(), v3.end(), 6);
+ static_assert(std::is_same<decltype(r1),
+ tbb::blocked_range3d<decltype(v)::iterator, decltype(v2)::iterator, decltype(v3)::iterator>>::value);
+
+ // check blocked_range2d(blocked_range3d &)
+ tbb::blocked_range3d r2(r1);
+ static_assert(std::is_same<decltype(r2), decltype(r1)>::value);
+
+ // check blocked_range2d(blocked_range3d &&)
+ tbb::blocked_range3d r3(std::move(r1));
+ static_assert(std::is_same<decltype(r2), decltype(r1)>::value);
+}
+#endif
+
#include "tbb/task_scheduler_init.h"
int TestMain () {
tbb::task_scheduler_init init(p);
ParallelTest();
}
+
+ #if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+ TestDeductionGuides();
+ #endif
return Harness::Done;
}
a.deallocate( ptr, 1 );
}
+template<typename T>
+struct debug_hash_compare : tbb::tbb_hash_compare<T> {};
+
template <bool default_construction_present, typename Value>
void TypeTester( const std::list<Value> &lst ) {
__TBB_ASSERT( lst.size() >= 5, "Array should have at least 5 elements" );
typedef typename Value::first_type first_type;
typedef typename Value::second_type second_type;
typedef tbb::concurrent_hash_map<first_type,second_type> ch_map;
+ debug_hash_compare<first_type> compare;
// Construct an empty hash map.
ch_map c1;
c1.insert( lst.begin(), lst.end() );
#if __TBB_INITIALIZER_LISTS_PRESENT && !__TBB_CPP11_INIT_LIST_TEMP_OBJS_LIFETIME_BROKEN
// Constructor from initializer_list.
typename std::list<Value>::const_iterator it = lst.begin();
- ch_map c2( {*it++, *it++, *it++} );
+ std::initializer_list<Value> il = { *it++, *it++, *it++ };
+ ch_map c2( il );
c2.insert( it, lst.end() );
Examine<default_construction_present>( c2, lst );
+
+ // Constructor from initializer_list and compare object
+ ch_map c3( il, compare);
+ c3.insert( it, lst.end() );
+ Examine<default_construction_present>( c3, lst );
+
+ // Constructor from initializer_list, compare object and allocator
+ ch_map c4( il, compare, typename ch_map::allocator_type());
+ c4.insert( it, lst.end());
+ Examine<default_construction_present>( c4, lst );
#endif
// Copying constructor.
- ch_map c3(c1);
- Examine<default_construction_present>( c3, lst );
+ ch_map c5(c1);
+ Examine<default_construction_present>( c5, lst );
// Construct with non-default allocator
typedef tbb::concurrent_hash_map< first_type,second_type,tbb::tbb_hash_compare<first_type>,debug_allocator<Value> > ch_map_debug_alloc;
- ch_map_debug_alloc c4;
- c4.insert( lst.begin(), lst.end() );
- Examine<default_construction_present>( c4, lst );
- // Copying constructor for vector with different allocator type.
- ch_map_debug_alloc c5(c4);
- Examine<default_construction_present>( c5, lst );
- // Construction empty table with n preallocated buckets.
- ch_map c6( lst.size() );
+ ch_map_debug_alloc c6;
c6.insert( lst.begin(), lst.end() );
Examine<default_construction_present>( c6, lst );
- ch_map_debug_alloc c7( lst.size() );
- c7.insert( lst.begin(), lst.end() );
+ // Copying constructor
+ ch_map_debug_alloc c7(c6);
Examine<default_construction_present>( c7, lst );
- // Construction with copying iteration range and given allocator instance.
- ch_map c8( c1.begin(), c1.end() );
+ // Construction empty table with n preallocated buckets.
+ ch_map c8( lst.size() );
+ c8.insert( lst.begin(), lst.end() );
Examine<default_construction_present>( c8, lst );
- debug_allocator<Value> allocator;
- ch_map_debug_alloc c9( lst.begin(), lst.end(), allocator );
+ ch_map_debug_alloc c9( lst.size() );
+ c9.insert( lst.begin(), lst.end() );
Examine<default_construction_present>( c9, lst );
+ // Construction with copying iteration range.
+ ch_map c10( c1.begin(), c1.end() );
+ Examine<default_construction_present>( c10, lst );
+ // Construction with copying iteration range and given allocator instance.
+ debug_allocator<Value> allocator;
+ ch_map_debug_alloc c11( lst.begin(), lst.end(), allocator );
+ Examine<default_construction_present>( c11, lst );
+
+ typedef tbb::concurrent_hash_map< first_type,second_type,debug_hash_compare<first_type>,typename ch_map::allocator_type> ch_map_debug_hash;
+
+ // Constructor with two iterators and hash_compare
+ ch_map_debug_hash c12(c1.begin(), c1.end(), compare);
+ Examine<default_construction_present>( c12, lst );
+
+ ch_map_debug_hash c13(c1.begin(), c1.end(), compare, typename ch_map::allocator_type());
+ Examine<default_construction_present>( c13, lst );
}
#if __TBB_CPP11_SMART_POINTERS_PRESENT
REPORT("Known issue: tests for C++11 move semantics support are skipped.\n");
}
#endif //__TBB_CPP11_RVALUE_REF_PRESENT
+
+template<typename Key>
+struct non_default_constructible_hash_compare : tbb::tbb_hash_compare<Key> {
+ non_default_constructible_hash_compare() {
+ ASSERT(false, "Hash compare object must not default construct during the construction of hash_map with compare argument");
+ }
+
+ non_default_constructible_hash_compare(int) {}
+};
+
+void TestHashCompareConstructors() {
+ typedef int key_type;
+ typedef tbb::concurrent_hash_map<key_type, key_type, non_default_constructible_hash_compare<key_type> > map_type;
+
+ non_default_constructible_hash_compare<key_type> compare(0);
+ map_type::allocator_type allocator;
+
+ map_type map1(compare);
+ map_type map2(compare, allocator);
+
+ map_type map3(1, compare);
+ map_type map4(1, compare, allocator);
+
+ std::vector<map_type::value_type> reference_vector;
+ map_type map5(reference_vector.begin(), reference_vector.end(), compare);
+ map_type map6(reference_vector.begin(), reference_vector.end(), compare, allocator);
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+ map_type map7({}, compare);
+ map_type map8({}, compare, allocator);
+#endif
+}
+
//------------------------------------------------------------------------
// Test driver
//------------------------------------------------------------------------
}
TestCPP11Types();
+ TestHashCompareConstructors();
return Harness::Done;
}
#endif /* __TBB_CPP11_SMART_POINTERS_PRESENT */
}
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+template <template <typename...>typename TQueue>
+void TestDeductionGuides() {
+ using ComplexType = const std::string*;
+ std::string s("s");
+ std::vector<ComplexType> v;
+ auto l = {ComplexType(&s), ComplexType(&s) };
+
+ // check TQueue(InputIterator, InputIterator)
+ TQueue q1(v.begin(), v.end());
+ static_assert(std::is_same<decltype(q1), TQueue<ComplexType>>::value);
+
+ // check TQueue(InputIterator, InputIterator, Allocator)
+ TQueue q2(v.begin(), v.end(), std::allocator<ComplexType>());
+ static_assert(std::is_same<decltype(q2), TQueue<ComplexType, std::less<ComplexType>,
+ std::allocator<ComplexType>>>::value);
+
+ // check TQueue(std::initializer_list)
+ TQueue q3(l);
+ static_assert(std::is_same<decltype(q3), TQueue<ComplexType>>::value);
+
+ // check TQueue(std::initializer_list, Allocator)
+ TQueue q4(l, std::allocator<ComplexType>());
+ static_assert(std::is_same<decltype(q4), TQueue<ComplexType, std::less<ComplexType>,
+ std::allocator<ComplexType>>>::value);
+
+ // check TQueue(TQueue &)
+ TQueue q5(q1);
+ static_assert(std::is_same<decltype(q5), decltype(q5)>::value);
+
+ // check TQueue(TQueue &, Allocator)
+ TQueue q6(q4, std::allocator<ComplexType>());
+ static_assert(std::is_same<decltype(q6), decltype(q4)>::value);
+
+ // check TQueue(TQueue &&)
+ TQueue q7(std::move(q1));
+ static_assert(std::is_same<decltype(q7), decltype(q1)>::value);
+
+ // check TQueue(TQueue &&, Allocator)
+ TQueue q8(std::move(q4), std::allocator<ComplexType>());
+ static_assert(std::is_same<decltype(q8), decltype(q4)>::value);
+}
+#endif
+
int TestMain() {
if (MinThread < 1)
MinThread = 1;
TestTypes();
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+ TestDeductionGuides<tbb::concurrent_priority_queue>();
+#endif
+
#if __TBB_CPP11_RVALUE_REF_PRESENT
TestgMoveConstructor();
TestgMoveAssignOperator();
#endif /* __TBB_CPP11_SMART_POINTERS_PRESENT */
}
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+template <template <typename...> typename TQueue>
+void TestDeductionGuides() {
+ using ComplexType = const std::string*;
+ std::vector<ComplexType> v;
+
+ // check TQueue(InputIterator, InputIterator)
+ TQueue q1(v.begin(), v.end());
+ static_assert(std::is_same<decltype(q1), TQueue<ComplexType>>::value);
+
+ // check TQueue(InputIterator, InputIterator, Allocator)
+ TQueue q2(v.begin(), v.end(), std::allocator<ComplexType>());
+ static_assert(std::is_same<decltype(q2), TQueue<ComplexType, std::allocator<ComplexType>>>::value);
+
+ // check TQueue(TQueue &)
+ TQueue q3(q1);
+ static_assert(std::is_same<decltype(q3), decltype(q1)>::value);
+
+ // check TQueue(TQueue &, Allocator)
+ TQueue q4(q2, std::allocator<ComplexType>());
+ static_assert(std::is_same<decltype(q4), decltype(q2)>::value);
+
+ // check TQueue(TQueue &&)
+ TQueue q5(std::move(q1));
+ static_assert(std::is_same<decltype(q5), decltype(q1)>::value);
+
+ // check TQueue(TQueue &&, Allocator)
+ TQueue q6(std::move(q4), std::allocator<ComplexType>());
+ static_assert(std::is_same<decltype(q6), decltype(q4)>::value);
+}
+#endif
+
int TestMain () {
TestEmptiness();
TestTypes();
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+ TestDeductionGuides<tbb::concurrent_queue>();
+ TestDeductionGuides<tbb::concurrent_bounded_queue>();
+#endif
+
return Harness::Done;
}
#endif /* __TBB_CPP11_SMART_POINTERS_PRESENT */
}
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+template <template <typename...> typename TVector>
+void TestDeductionGuides() {
+ using ComplexType = const std::string*;
+ std::vector<ComplexType> v;
+ std::string s = "s";
+ auto l = {ComplexType(&s), ComplexType(&s)};
+
+ // check TVector(InputIterator, InputIterator)
+ TVector v1(v.begin(), v.end());
+ static_assert(std::is_same<decltype(v1), TVector<ComplexType>>::value);
+
+ // check TVector(InputIterator, InputIterator, Alocator)
+ TVector v2(v.begin(), v.end(), std::allocator<ComplexType>());
+ static_assert(std::is_same<decltype(v2),
+ TVector<ComplexType, std::allocator<ComplexType>>>::value);
+
+ // check TVector(std::initializer_list<T>)
+ TVector v3(l);
+ static_assert(std::is_same<decltype(v3),
+ TVector<ComplexType>>::value);
+
+ // check TVector(std::initializer_list, Alocator)
+ TVector v4(l, std::allocator<ComplexType>());
+ static_assert(std::is_same<decltype(v4), TVector<ComplexType, std::allocator<ComplexType>>>::value);
+
+ // check TVector(TVector&)
+ TVector v5(v1);
+ static_assert(std::is_same<decltype(v5), TVector<ComplexType>>::value);
+
+ // check TVector(TVector&, Allocator)
+ TVector v6(v5, std::allocator<ComplexType>());
+ static_assert(std::is_same<decltype(v6), TVector<ComplexType, std::allocator<ComplexType>>>::value);
+
+ // check TVector(TVector&&)
+ TVector v7(std::move(v1));
+ static_assert(std::is_same<decltype(v7), decltype(v1)>::value);
+
+ // check TVector(TVector&&, Allocator)
+ TVector v8(std::move(v5), std::allocator<ComplexType>());
+ static_assert(std::is_same<decltype(v8), TVector<ComplexType, std::allocator<ComplexType>>>::value);
+
+ // check TVector(TVector&, Allocator)
+ TVector v9(v1, std::allocator<ComplexType>());
+ static_assert(std::is_same<decltype(v9), TVector<ComplexType, std::allocator<ComplexType>>>::value);
+
+}
+#endif
+
int TestMain () {
if( MinThread<1 ) {
REPORT("ERROR: MinThread=%d, but must be at least 1\n",MinThread); MinThread = 1;
#endif /*__TBB_CPP11_RVALUE_REF_PRESENT */
#endif /* TBB_USE_EXCEPTIONS */
TestTypes();
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+ TestDeductionGuides<tbb::concurrent_vector>();
+#endif
ASSERT( !FooCount, NULL );
REMARK("sizeof(concurrent_vector<int>) == %d\n", (int)sizeof(tbb::concurrent_vector<int>));
return Harness::Done;
}
}
-#include "harness.h"
#include "harness_memory.h"
// TODO: Consider adding Huge Pages support on macOS (special mmap flag).
if ((newSystemTHPCount - currentSystemTHPCount) < allocCount
&& (newSystemTHPAllocatedSize - currentSystemTHPAllocatedSize) / (2 * 1024) < allocCount) {
REPORT( "Warning: the system didn't allocate needed amount of THPs.\n" );
- }
+ }
// Test memory unmap
for (int i = 0; i < allocCount; i++) {
}
#endif // __linux__
+inline size_t getStabilizedMemUsage() {
+ for (int i = 0; i < 3; i++) GetMemoryUsage();
+ return GetMemoryUsage();
+}
+
+inline void* reallocAndRetrieve(void* origPtr, size_t reallocSize, size_t& origBlockSize, size_t& reallocBlockSize) {
+ rml::internal::LargeMemoryBlock* origLmb = ((rml::internal::LargeObjectHdr *)origPtr - 1)->memoryBlock;
+ origBlockSize = origLmb->unalignedSize;
+
+ void* reallocPtr = rml::internal::reallocAligned(defaultMemPool, origPtr, reallocSize, 0);
+
+ // Retrieved reallocated block information
+ rml::internal::LargeMemoryBlock* reallocLmb = ((rml::internal::LargeObjectHdr *)reallocPtr - 1)->memoryBlock;
+ reallocBlockSize = reallocLmb->unalignedSize;
+
+ return reallocPtr;
+}
+
+void TestReallocDecreasing() {
+
+ /* Testing that actual reallocation happens for large objects that do not fit the backend cache
+ but decrease in size by a factor of >= 2. */
+
+ size_t startSize = 100 * 1024 * 1024;
+ size_t maxBinnedSize = defaultMemPool->extMemPool.backend.getMaxBinnedSize();
+ void* origPtr = scalable_malloc(startSize);
+ void* reallocPtr = NULL;
+
+ // Realloc on 1MB less size
+ size_t origBlockSize = 42;
+ size_t reallocBlockSize = 43;
+ reallocPtr = reallocAndRetrieve(origPtr, startSize - 1 * 1024 * 1024, origBlockSize, reallocBlockSize);
+ MALLOC_ASSERT(origBlockSize == reallocBlockSize, "Reallocated block size shouldn't change");
+ MALLOC_ASSERT(reallocPtr == origPtr, "Original pointer shouldn't change");
+
+ // Repeated decreasing reallocation while max cache bin size reached
+ size_t reallocSize = (startSize / 2) - 1000; // exact realloc
+ while(reallocSize > maxBinnedSize) {
+
+ // Prevent huge/large objects caching
+ defaultMemPool->extMemPool.loc.cleanAll();
+ // Prevent local large object caching
+ TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
+ tls->lloc.externalCleanup(&defaultMemPool->extMemPool);
+
+ size_t sysMemUsageBefore = getStabilizedMemUsage();
+ size_t totalMemSizeBefore = defaultMemPool->extMemPool.backend.getTotalMemSize();
+
+ reallocPtr = reallocAndRetrieve(origPtr, reallocSize, origBlockSize, reallocBlockSize);
+
+ MALLOC_ASSERT(origBlockSize > reallocBlockSize, "Reallocated block size should descrease.");
+
+ size_t sysMemUsageAfter = getStabilizedMemUsage();
+ size_t totalMemSizeAfter = defaultMemPool->extMemPool.backend.getTotalMemSize();
+
+ // Prevent false checking when backend caching occurred or could not read system memory usage info
+ if (totalMemSizeBefore > totalMemSizeAfter && sysMemUsageAfter != 0 && sysMemUsageBefore != 0) {
+ MALLOC_ASSERT(sysMemUsageBefore > sysMemUsageAfter, "Memory were not released");
+ }
+
+ origPtr = reallocPtr;
+ reallocSize = (reallocSize / 2) - 1000; // exact realloc
+ }
+ scalable_free(reallocPtr);
+
+ /* TODO: Decreasing reallocation of large objects that fit backend cache */
+ /* TODO: Small objects decreasing reallocation test */
+}
+
int TestMain () {
scalable_allocation_mode(USE_HUGE_PAGES, 0);
#if !__TBB_WIN8UI_SUPPORT
TestHeapLimit();
TestLOC();
TestSlabAlignment();
+ TestReallocDecreasing();
#if __linux__
if (isTHPEnabledOnMachine()) {
#define TBB_PREVIEW_LOCAL_OBSERVER 1
#define __TBB_EXTRA_DEBUG 1
+#define TBB_PREVIEW_GLOBAL_CONTROL 1
#include <stdexcept>
#include <cstdlib>
#include <cstdio>
+#include <vector>
+#include <set>
#include "harness_fp.h"
#endif /* __TBB_TASK_ISOLATION */
#include "tbb/task_arena.h"
+#include "tbb/atomic.h"
#include "tbb/task_scheduler_observer.h"
#include "tbb/task_scheduler_init.h"
#include "tbb/parallel_for.h"
#pragma comment(lib, __TBB_STRING(__TBB_LIB_NAME))
#endif
+#include "tbb/global_control.h"
//--------------------------------------------------//
// Test that task_arena::initialize and task_arena::terminate work when doing nothing else.
/* maxthread is treated as the biggest possible concurrency level. */
}
//--------------------------------------------------//
-int TestMain () {
+// MyObserver checks if threads join to the same arena
+struct MyObserver: public tbb::task_scheduler_observer {
+ tbb::enumerable_thread_specific<tbb::task_arena*>& my_tls;
+ tbb::task_arena& my_arena;
+ tbb::atomic<int>& my_failure_counter;
+ tbb::atomic<int>& my_counter;
+
+ MyObserver(tbb::task_arena& a,
+ tbb::enumerable_thread_specific<tbb::task_arena*>& tls,
+ tbb::atomic<int>& failure_counter,
+ tbb::atomic<int>& counter)
+ : tbb::task_scheduler_observer(a), my_tls(tls), my_arena(a),
+ my_failure_counter(failure_counter), my_counter(counter) {
+ observe(true);
+ }
+ void on_scheduler_entry(bool worker) __TBB_override {
+ if (worker) {
+ ++my_counter;
+ tbb::task_arena*& cur_arena = my_tls.local();
+ if (cur_arena != 0 && cur_arena != &my_arena) {
+ ++my_failure_counter;
+ }
+ cur_arena = &my_arena;
+ }
+ }
+};
+
+struct MyLoopBody {
+ Harness::SpinBarrier& m_barrier;
+ MyLoopBody(Harness::SpinBarrier& b):m_barrier(b) { }
+ void operator()(int) const {
+ m_barrier.wait();
+ }
+};
+
+struct TaskForArenaExecute {
+ Harness::SpinBarrier& m_barrier;
+ TaskForArenaExecute(Harness::SpinBarrier& b):m_barrier(b) { }
+ void operator()() const {
+ tbb::parallel_for(0, tbb::this_task_arena::max_concurrency(),
+ MyLoopBody(m_barrier), tbb::simple_partitioner()
+ );
+ }
+};
+
+struct ExecuteParallelFor {
+ int n_per_thread;
+ int n_repetitions;
+ std::vector<tbb::task_arena>& arenas;
+ Harness::SpinBarrier& arena_barrier;
+ Harness::SpinBarrier& master_barrier;
+ ExecuteParallelFor(const int n_per_thread_, const int n_repetitions_,
+ std::vector<tbb::task_arena>& arenas_,
+ Harness::SpinBarrier& arena_barrier_, Harness::SpinBarrier& master_barrier_)
+ : n_per_thread(n_per_thread_), n_repetitions(n_repetitions_), arenas(arenas_),
+ arena_barrier(arena_barrier_), master_barrier(master_barrier_){ }
+ void operator()(int i) const {
+ for (int j = 0; j < n_repetitions; ++j) {
+ arenas[i].execute(TaskForArenaExecute(arena_barrier));
+ for(volatile int k = 0; k < n_per_thread; ++k){/* waiting until workers fall asleep */}
+ master_barrier.wait();
+ }
+ }
+};
+
+// if n_threads == -1 then global_control initialized with default value
+void TestArenaWorkersMigrationWithNumThreads(int n_threads = 0) {
+ if (n_threads == 0) {
+ n_threads = tbb::task_scheduler_init::default_num_threads();
+ }
+ const int max_n_arenas = 8;
+ int n_arenas = 2;
+ if(n_threads >= 16)
+ n_arenas = max_n_arenas;
+ else if (n_threads >= 8)
+ n_arenas = 4;
+ n_threads = n_arenas * (n_threads / n_arenas);
+ const int n_per_thread = 10000000;
+ const int n_repetitions = 100;
+ const int n_outer_repetitions = 20;
+ std::multiset<float> failure_ratio; // for median calculating
+ tbb::global_control control(tbb::global_control::max_allowed_parallelism, n_threads - (n_arenas - 1));
+ Harness::SpinBarrier master_barrier(n_arenas);
+ Harness::SpinBarrier arena_barrier(n_threads);
+ MyObserver* observer[max_n_arenas];
+ std::vector<tbb::task_arena> arenas(n_arenas);
+ tbb::atomic<int> failure_counter;
+ tbb::atomic<int> counter;
+ tbb::enumerable_thread_specific<tbb::task_arena*> tls;
+ for (int i = 0; i < n_arenas; ++i) {
+ arenas[i].initialize(n_threads / n_arenas);
+ observer[i] = new MyObserver(arenas[i], tls, failure_counter, counter);
+ }
+ int ii = 0;
+ for (; ii < n_outer_repetitions; ++ii) {
+ failure_counter = 0;
+ counter = 0;
+ // Main code
+ NativeParallelFor(n_arenas, ExecuteParallelFor(n_per_thread, n_repetitions,
+ arenas, arena_barrier, master_barrier));
+ // TODO: get rid of check below by setting ratio between n_threads and n_arenas
+ failure_ratio.insert((counter != 0 ? float(failure_counter) / counter : 1.0f));
+ tls.clear();
+ // collect 3 elements in failure_ratio before calculating median
+ if (ii > 1) {
+ std::multiset<float>::iterator it = failure_ratio.begin();
+ std::advance(it, failure_ratio.size() / 2);
+ if (*it < 0.02)
+ break;
+ }
+ }
+ for (int i = 0; i < n_arenas; ++i) {
+ delete observer[i];
+ }
+ // check if median is so big
+ std::multiset<float>::iterator it = failure_ratio.begin();
+ std::advance(it, failure_ratio.size() / 2);
+ // TODO: decrease constants 0.05 and 0.3 by setting ratio between n_threads and n_arenas
+ if (*it > 0.05) {
+ REPORT("Warning: So many cases when threads join to different arenas.\n");
+ ASSERT(*it <= 0.3, "A lot of cases when threads join to different arenas.\n");
+ }
+}
+
+void TestArenaWorkersMigration() {
+ TestArenaWorkersMigrationWithNumThreads(4);
+ if (tbb::task_scheduler_init::default_num_threads() != 4) {
+ TestArenaWorkersMigrationWithNumThreads();
+ }
+}
+
+//--------------------------------------------------//
+
+int TestMain() {
#if __TBB_TASK_ISOLATION
TestIsolatedExecute();
#endif /* __TBB_TASK_ISOLATION */
TestMultipleWaits();
TestMoveSemantics();
TestReturnValue();
+ TestArenaWorkersMigration();
return Harness::Done;
}
TestFuncDefinitionPresence( parallel_do, (const intarray&, const Body1a&, tbb::task_group_context&), void );
TestFuncDefinitionPresence( parallel_for_each, (const intarray&, const Body1&, tbb::task_group_context&), void );
TestFuncDefinitionPresence( parallel_for, (int, int, const Body1&, const tbb::auto_partitioner&, tbb::task_group_context&), void );
+ TestFuncDefinitionPresence( parallel_for, (int, int, const Body1&, tbb::task_group_context&), void );
TestFuncDefinitionPresence( parallel_reduce, (const tbb::blocked_range<int>&, Body2&, const tbb::auto_partitioner&, tbb::task_group_context&), void );
+ TestFuncDefinitionPresence( parallel_reduce, (const tbb::blocked_range<int>&, Body2&, tbb::task_group_context&), void );
+ TestFuncDefinitionPresence( parallel_deterministic_reduce, (const tbb::blocked_range<int>&, Body2&, const tbb::simple_partitioner&, tbb::task_group_context&), void );
+ TestFuncDefinitionPresence( parallel_deterministic_reduce, (const tbb::blocked_range<int>&, Body2&, tbb::task_group_context&), void );
#endif /* __TBB_TASK_GROUP_CONTEXT */
TestTypeDefinitionPresence( proportional_split );
void initialize_strings_vector(std::vector <string_pair>* vector)
{
vector->push_back(string_pair("TBB: VERSION\t\t2019.0", required)); // check TBB_VERSION
- vector->push_back(string_pair("TBB: INTERFACE VERSION\t11001", required)); // check TBB_INTERFACE_VERSION
+ vector->push_back(string_pair("TBB: INTERFACE VERSION\t11002", required)); // check TBB_INTERFACE_VERSION
vector->push_back(string_pair("TBB: BUILD_DATE", required));
vector->push_back(string_pair("TBB: BUILD_HOST", required));
vector->push_back(string_pair("TBB: BUILD_OS", required));