2 Copyright (c) 2005-2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
17 /* Common part for the partitioner whitebox tests */
21 #include "tbb/tbb_thread.h"
22 #include "tbb/enumerable_thread_specific.h"
25 #include "harness_assert.h"
26 #include "test_partitioner.h"
30 // reducing number of simulations due to test timeout
31 const size_t max_simulated_threads = 256;
33 const size_t max_simulated_threads = 640;
36 typedef tbb::enumerable_thread_specific<size_t> ThreadNumsType;
37 size_t g_threadNumInitialValue = 10;
38 ThreadNumsType g_threadNums(g_threadNumInitialValue);
40 namespace whitebox_simulation {
41 size_t whitebox_thread_index = 0;
42 test_partitioner_utils::BinaryTree reference_tree;
45 // simulate a subset of task.h
48 typedef unsigned short affinity_id;
52 typedef internal::affinity_id affinity_id;
53 void set_affinity(affinity_id a) { my_affinity = a; }
54 affinity_id affinity() const { return my_affinity; }
55 void set_parent(fake_task* p) { my_parent = p; }
56 fake_task *parent() const { return my_parent; }
57 bool is_stolen_task() const { return false; }
58 intptr_t ref_count() const { return 1; }
59 bool is_cancelled() const { return false; }
60 static void spawn(fake_task &) {} // for legacy in partitioner.h
61 virtual fake_task* execute() = 0; // enables dynamic_cast
63 fake_task() : my_parent(0), my_affinity(0) {}
64 virtual ~fake_task() {}
67 affinity_id my_affinity;
69 namespace task_arena {
70 static const int not_initialized = -2;//should match corresponding value in task_arena.h
71 }//namespace task_arena
72 namespace this_task_arena {
73 inline int current_thread_index() { return (int)whitebox_simulation::whitebox_thread_index; }
78 #define __TBB_task_arena_H
79 #define get_initial_auto_partitioner_divisor my_get_initial_auto_partitioner_divisor
80 #define affinity_partitioner_base_v3 my_affinity_partitioner_base_v3
81 #define task fake_task
82 #define __TBB_STATIC_THRESHOLD 0
83 #include "tbb/partitioner.h"
84 #undef __TBB_STATIC_THRESHOLD
86 #undef affinity_partitioner_base_v3
87 #undef get_initial_auto_partitioner_divisor
89 // replace library functions to simulate concurrency
92 size_t my_get_initial_auto_partitioner_divisor() {
93 const size_t X_FACTOR = 4;
94 return X_FACTOR * g_threadNums.local();
97 void* __TBB_EXPORTED_FUNC NFS_Allocate( size_t n_element, size_t element_size, void* hint );
98 void __TBB_EXPORTED_FUNC NFS_Free( void* );
100 void my_affinity_partitioner_base_v3::resize( unsigned factor ) {
101 // Check factor to avoid asking for number of workers while there might be no arena.
102 size_t new_size = factor ? factor * g_threadNums.local() : 0;
103 if (new_size != my_size) {
106 // Following two assignments must be done here for sake of exception safety.
111 my_array = static_cast<affinity_id*>(NFS_Allocate(new_size, sizeof(affinity_id), NULL ));
112 memset(my_array, 0, sizeof(affinity_id) * new_size);
118 } //namespace internal
119 // simulate a subset of parallel_for
120 namespace interface9 {
123 // parallel_for algorithm that executes sequentially
124 template<typename Range, typename Body, typename Partitioner>
125 class start_for : public fake_task {
128 typename Partitioner::task_partition_type my_partition;
129 size_t m_executedBegin, m_executedEnd;
131 size_t m_joinedBegin, m_joinedEnd;
132 test_partitioner_utils::BinaryTree* m_tree;
134 start_for( const Range& range, const Body& body, Partitioner& partitioner,
135 test_partitioner_utils::BinaryTree* tree ) :
136 my_range(range), my_body(body), my_partition(partitioner),
137 m_executedBegin(0), m_executedEnd(0), m_firstTimeRun(true),
138 m_joinedBegin(/* grows left */ range.end()), m_joinedEnd(range.end()), m_tree(tree)
141 m_tree->push_node( test_partitioner_utils::make_node(my_range.begin(), my_range.end(), affinity()) );
144 //! Splitting constructor used to generate children.
145 /** parent_ becomes left child. Newly constructed object is right child. */
146 start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
147 my_range(parent_.my_range, split_obj),
148 my_body(parent_.my_body),
149 my_partition(parent_.my_partition, split_obj),
150 m_executedBegin(0), m_executedEnd(0), m_firstTimeRun(true),
151 m_joinedBegin(/* grows left */ my_range.end()), m_joinedEnd(my_range.end()),
152 m_tree(parent_.m_tree)
154 set_parent(parent_.parent());
155 my_partition.set_affinity(*this);
158 // collecting splitting statistics
159 m_tree->push_node( test_partitioner_utils::make_node(my_range.begin(),
162 m_tree->push_node( test_partitioner_utils::make_node(parent_.my_range.begin(),
163 parent_.my_range.end(),
164 parent_.affinity()) );
167 //! Construct right child from the given range as response to the demand.
168 /** parent_ remains left child. Newly constructed object is right child. */
169 start_for( start_for& parent_, const Range& r, depth_t d ) :
171 my_body(parent_.my_body),
172 my_partition(parent_.my_partition, tbb::split()),
173 m_executedBegin(0), m_executedEnd(0), m_firstTimeRun(true),
174 m_joinedBegin(/* grows left */ r.end()), m_joinedEnd(r.end()),
175 m_tree(parent_.m_tree)
177 set_parent(parent_.parent());
178 my_partition.set_affinity(*this);
179 my_partition.align_depth( d );
181 fake_task* execute() __TBB_override {
182 my_partition.check_being_stolen( *this );
183 size_t origBegin = my_range.begin();
184 size_t origEnd = my_range.end();
186 my_partition.execute(*this, my_range);
188 ASSERT(m_executedEnd == m_joinedBegin, "Non-continuous execution");
189 m_executedEnd = m_joinedEnd;
191 ASSERT(origBegin == m_executedBegin && origEnd == m_executedEnd,
192 "Not all iterations were processed");
195 //! Run body for range, serves as callback for partitioner
196 void run_body( Range &r ) {
197 if( r.is_ensure_non_emptiness() )
198 ASSERT( !r.empty(), "Empty ranges are not allowed" );
200 if (m_firstTimeRun) {
201 m_firstTimeRun = false;
202 m_executedBegin = m_executedEnd = r.begin();
204 ASSERT(m_executedBegin <= r.begin() && m_executedEnd <= r.end(),
205 "Non-continuous execution");
206 m_executedEnd = r.end();
208 //! spawn right task, serves as callback for partitioner
209 void offer_work(typename Partitioner::split_type& split_obj) {
210 start_for sibling(*this, split_obj);
212 join(sibling.m_executedBegin, sibling.m_executedEnd);
214 //! spawn right task, serves as callback for partitioner
215 void offer_work(const Range& r, depth_t d = 0) {
216 start_for sibling(*this, r, d);
218 join(sibling.m_executedBegin, sibling.m_executedEnd);
220 void join(size_t siblingExecutedBegin, size_t siblingExecutedEnd) {
221 ASSERT(siblingExecutedEnd == m_joinedBegin, "?");
222 m_joinedBegin = siblingExecutedBegin;
226 } //namespace internal
227 } //namespace interfaceX
230 namespace whitebox_simulation {
231 using namespace tbb::interface9::internal;
232 template<typename Range, typename Body, typename Partitioner>
233 void parallel_for( const Range& range, const Body& body, Partitioner& partitioner,
234 test_partitioner_utils::BinaryTree* tree = NULL) {
235 if (!range.empty()) {
237 start_for<Range, Body, Partitioner> start(range, body, partitioner, tree);
238 start.set_parent(&parent);
243 } //namespace whitebox_simulation
245 template <typename Range, typename Body, typename Partitioner>
246 void test_case(Range& range, const Body& body, Partitioner& partitioner,
247 test_partitioner_utils::BinaryTree* tree = NULL) {
248 whitebox_simulation::parallel_for(range, body, partitioner, tree);
251 // Functions generate size for range objects used in tests
252 template <typename T>
253 size_t default_range_size_generator(T* factor, unsigned index, size_t thread_num) {
254 return size_t(factor[index] * thread_num);
257 size_t shifted_left_range_size_generator(size_t* factor, unsigned index, size_t thread_num) {
258 return factor[index] * thread_num - 1;
261 size_t shifted_right_range_size_generator(size_t* factor, unsigned index, size_t thread_num) {
262 return factor[index] * thread_num + 1;
265 size_t max_range_size_generator(size_t*, unsigned, size_t) {
269 size_t simple_size_generator(size_t*, unsigned index, size_t) {
273 namespace uniform_iterations_distribution {
276 * Test checks uniform distribution of range's iterations among all tasks just after
277 * work distribution phase has been completed and just before work balancing phase has been started
280 using namespace test_partitioner_utils;
282 class ParallelTestBody {
284 struct use_case_settings_t;
286 typedef void (*CheckerFuncType)(const char*, size_t, const use_case_settings_t*, const RangeStatisticData&);
288 struct use_case_settings_t {
289 size_t thread_num; // number of threads used during current use case
290 unsigned factors_array_len; // size of 'factors' array
291 size_t range_begin; // beginning of range iterations
292 bool provide_feedback; // 'true' if range should give feedback
293 bool ensure_non_empty_size; // don't allow empty size ranges
295 size_t above_threads_size_tolerance; // allowed value for number of created ranges
296 // when initial size of the range was greater or
297 // equal to number of threads
299 size_t below_threads_size_tolerance; // allowed value for number of created ranges
300 // when initial size of the range was less than
303 size_t between_min_max_ranges_tolerance; // allowed value for difference of iterations
304 // between bigger and lesser ranges
306 CheckerFuncType checker; // checker function for a particular test case
309 ParallelTestBody(size_t parallel_group_thread_starting_index)
310 : m_parallel_group_thread_starting_index(parallel_group_thread_starting_index) { }
312 void operator()(size_t) const { ASSERT( false, "Empty ParallelTestBody called" ); }
314 static void uniform_distribution_checker(const char* rangeName, size_t rangeSize, const use_case_settings_t* settings,
315 const RangeStatisticData& stat)
317 // Checking that all threads were given a task
318 if (rangeSize >= settings->thread_num) {
320 max(stat.m_rangeNum, settings->thread_num) - min(stat.m_rangeNum, settings->thread_num);
321 if (disparity > settings->above_threads_size_tolerance) {
322 REPORT("ERROR: '%s (f=%d|e=%d)': |#ranges(%llu)-#threads(%llu)|=%llu > %llu=tolerance\n",
323 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size), stat.m_rangeNum,
324 settings->thread_num, disparity, uint64_t(settings->above_threads_size_tolerance));
325 ASSERT(disparity <= settings->above_threads_size_tolerance, "Incorrect number of range "
326 "objects was created before work balancing phase started");
328 } else if (settings->ensure_non_empty_size && rangeSize != 0) {
329 uint64_t disparity = max(stat.m_rangeNum, rangeSize) - min(stat.m_rangeNum, rangeSize);
330 if (disparity > settings->below_threads_size_tolerance ) {
331 REPORT("ERROR: '%s (f=%d|e=%d)': |#ranges-range size|=%llu > %llu=tolerance\n",
332 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
333 disparity, uint64_t(settings->below_threads_size_tolerance));
334 ASSERT(disparity <= settings->below_threads_size_tolerance, "Incorrect number of range objects"
335 " was created before work balancing phase started");
338 // Checking difference between min and max number of range iterations
339 size_t diff = stat.m_maxRangeSize - stat.m_minRangeSize;
340 if (diff > settings->between_min_max_ranges_tolerance) {
341 REPORT("ERROR: '%s (f=%d|e=%d)': range size difference=%llu > %llu=tolerance\n",
342 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
343 uint64_t(diff), uint64_t(settings->between_min_max_ranges_tolerance));
344 ASSERT(diff <= settings->between_min_max_ranges_tolerance, "Uniform iteration distribution error");
347 // Checker for test cases where ranges don't provide feedback during proportional split to
348 // partitioner and differ from tbb::blocked_range implementation in their splitting algorithm
349 static void nonuniform_distribution_checker(const char* rangeName, size_t rangeSize, const use_case_settings_t* settings,
350 const RangeStatisticData& stat)
352 if (stat.m_rangeNum > settings->thread_num) {
353 REPORT("ERROR: '%s (f=%d|e=%d)': %llu=#ranges > #threads=%llu\n",
354 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
355 uint64_t(stat.m_rangeNum), uint64_t(settings->thread_num));
356 ASSERT(stat.m_rangeNum <= settings->thread_num,
357 "Incorrect number of range objects was created before work balancing phase started");
359 // Checking difference between min and max number of range iterations
360 size_t diff = stat.m_maxRangeSize - stat.m_minRangeSize;
361 if (diff > rangeSize) {
362 REPORT("ERROR: '%s (f=%d|e=%d)': range size difference=%llu > %llu=initial range size\n",
363 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
364 uint64_t(diff), uint64_t(rangeSize));
365 ASSERT(diff <= rangeSize, "Iteration distribution error");
370 size_t m_parallel_group_thread_starting_index; // starting index of thread
372 template <typename Range, typename Partitioner, typename T>
373 void test(use_case_settings_t& settings, T factors[], size_t (*rsgFunc)(T*, unsigned, size_t)
374 = &default_range_size_generator<T>) const
376 for (unsigned i = 0; i < settings.factors_array_len; ++i) {
377 size_t range_end = rsgFunc(factors, i, settings.thread_num);
378 RangeStatisticData stat = { /*range num=*/ 0, /*minimal size of range=*/ 0,
379 /*maximal size of range=*/ 0, /*minimal size of range was not rewritten yet=*/ false };
380 Range range = Range(settings.range_begin, range_end, &stat, settings.provide_feedback,
381 settings.ensure_non_empty_size);
382 Partitioner my_partitioner;
383 test_case(range, SimpleBody(), my_partitioner, NULL);
384 size_t range_size = range_end - settings.range_begin;
385 const char* rangeName = typeid(range).name();
386 settings.checker(rangeName, range_size, &settings, stat);
391 template <typename ParallelTestBody>
393 size_t hw_threads_num = tbb::tbb_thread::hardware_concurrency();
394 size_t threadsToRunOn = std::min<size_t>(max_simulated_threads, hw_threads_num);
396 size_t parallel_group_thread_starting_index = 1;
397 while( parallel_group_thread_starting_index <= max_simulated_threads - threadsToRunOn ) {
398 NativeParallelFor(threadsToRunOn, ParallelTestBody(parallel_group_thread_starting_index));
399 parallel_group_thread_starting_index += threadsToRunOn;
401 NativeParallelFor(max_simulated_threads - parallel_group_thread_starting_index,
402 ParallelTestBody(parallel_group_thread_starting_index));
405 namespace task_affinity_whitebox {
406 size_t range_begin = 0;
407 size_t range_end = 20;
410 template<typename Partitioner>
411 void check_tree(const test_partitioner_utils::BinaryTree&);
414 void check_tree<tbb::affinity_partitioner>(const test_partitioner_utils::BinaryTree& tree) {
415 ASSERT(tree == whitebox_simulation::reference_tree,
416 "affinity_partitioner distributes tasks differently from run to run");
420 void check_tree<tbb::static_partitioner>(const test_partitioner_utils::BinaryTree& tree) {
421 std::vector<test_partitioner_utils::TreeNode* > tree_leafs;
422 tree.fill_leafs(tree_leafs);
423 typedef std::vector<size_t> Slots;
424 Slots affinity_slots(tree_leafs.size() + 1, 0);
426 for (std::vector<test_partitioner_utils::TreeNode*>::iterator i = tree_leafs.begin(); i != tree_leafs.end(); ++i) {
427 affinity_slots[(*i)->m_affinity]++;
428 if ((*i)->m_affinity == 0)
429 ASSERT((*i)->m_range_begin == task_affinity_whitebox::range_begin,
430 "Task with affinity 0 was executed with wrong range");
433 typedef std::iterator_traits<Slots::iterator>::difference_type slots_difference_type;
434 ASSERT(std::count(affinity_slots.begin(), affinity_slots.end(), size_t(0)) == slots_difference_type(1),
435 "static_partitioner incorrectly distributed tasks by threads");
436 ASSERT(std::count(affinity_slots.begin(), affinity_slots.end(), size_t(1)) == slots_difference_type(g_threadNums.local()),
437 "static_partitioner incorrectly distributed tasks by threads");
438 ASSERT(affinity_slots[tbb::this_task_arena::current_thread_index() + 1] == 0,
439 "static_partitioner incorrectly assigns task with 0 affinity");
440 ASSERT(std::accumulate(affinity_slots.begin(), affinity_slots.end(), size_t(0)) == g_threadNums.local(),
441 "static_partitioner has created more tasks than the number of threads");
444 template<typename Partitioner>
445 void test_task_affinity() {
446 using namespace task_affinity_whitebox;
447 test_partitioner_utils::SimpleBody body;
448 for (size_t p = 1; p <= 50; ++p) {
449 g_threadNums.local() = p;
450 whitebox_simulation::whitebox_thread_index = 0;
451 test_partitioner_utils::TestRanges::BlockedRange range(range_begin, range_end, /*statData*/NULL,
452 /*provide_feedback*/false, /*ensure_non_empty_size*/false);
453 Partitioner partitioner;
454 whitebox_simulation::reference_tree = test_partitioner_utils::BinaryTree();
455 whitebox_simulation::parallel_for(range, body, partitioner, &(whitebox_simulation::reference_tree));
456 while (whitebox_simulation::whitebox_thread_index < p) {
457 test_partitioner_utils::BinaryTree tree;
458 whitebox_simulation::parallel_for(range, body, partitioner, &tree);
459 check_tree<Partitioner>(tree);
460 whitebox_simulation::whitebox_thread_index++;
467 } /* namespace uniform_iterations_distribution */