src/test/test_partitioner_whitebox.h

   1 /*
   2     Copyright (c) 2005-2019 Intel Corporation
   3
   4     Licensed under the Apache License, Version 2.0 (the "License");
   5     you may not use this file except in compliance with the License.
   6     You may obtain a copy of the License at
   7
   8         http://www.apache.org/licenses/LICENSE-2.0
   9
  10     Unless required by applicable law or agreed to in writing, software
  11     distributed under the License is distributed on an "AS IS" BASIS,
  12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13     See the License for the specific language governing permissions and
  14     limitations under the License.
  15 */
  16
  17 /* Common part for the partitioner whitebox tests */
  18
  19 #include <typeinfo>
  20
  21 #include "tbb/tbb_thread.h"
  22 #include "tbb/enumerable_thread_specific.h"
  23
  24 #include "string.h"
  25 #include "harness_assert.h"
  26 #include "test_partitioner.h"
  27 #include <numeric>
  28
  29 #if TBB_USE_DEBUG
  30 // reducing number of simulations due to test timeout
  31 const size_t max_simulated_threads = 256;
  32 #else
  33 const size_t max_simulated_threads = 640;
  34 #endif
  35
  36 typedef tbb::enumerable_thread_specific<size_t> ThreadNumsType;
  37 size_t g_threadNumInitialValue = 10;
  38 ThreadNumsType g_threadNums(g_threadNumInitialValue);
  39
  40 namespace whitebox_simulation {
  41 size_t whitebox_thread_index = 0;
  42 test_partitioner_utils::BinaryTree reference_tree;
  43 }
  44
  45 // simulate a subset of task.h
  46 namespace tbb {
  47 namespace internal {
  48 typedef unsigned short affinity_id;
  49 }
  50 class fake_task {
  51 public:
  52     typedef internal::affinity_id affinity_id;
  53     void set_affinity(affinity_id a) { my_affinity = a; }
  54     affinity_id affinity() const { return my_affinity; }
  55     void set_parent(fake_task* p) { my_parent = p; }
  56     fake_task *parent() const { return my_parent; }
  57     bool is_stolen_task() const { return false; }
  58     intptr_t ref_count() const { return 1; }
  59     bool is_cancelled() const { return false; }
  60     static void spawn(fake_task &) {} // for legacy in partitioner.h
  61     virtual fake_task* execute() = 0; // enables dynamic_cast
  62
  63     fake_task() : my_parent(0), my_affinity(0) {}
  64     virtual ~fake_task() {}
  65 private:
  66     fake_task *my_parent;
  67     affinity_id my_affinity;
  68 };
  69 namespace task_arena {
  70 static const int not_initialized = -2;//should match corresponding value in task_arena.h
  71 }//namespace task_arena
  72 namespace this_task_arena {
  73 inline int current_thread_index() { return (int)whitebox_simulation::whitebox_thread_index; }
  74 }
  75 }//namespace tbb
  76
  77 #define __TBB_task_H
  78 #define __TBB_task_arena_H
  79 #define get_initial_auto_partitioner_divisor my_get_initial_auto_partitioner_divisor
  80 #define affinity_partitioner_base_v3 my_affinity_partitioner_base_v3
  81 #define task fake_task
  82 #define __TBB_STATIC_THRESHOLD 0
  83 #include "tbb/partitioner.h"
  84 #undef __TBB_STATIC_THRESHOLD
  85 #undef task
  86 #undef affinity_partitioner_base_v3
  87 #undef get_initial_auto_partitioner_divisor
  88
  89 // replace library functions to simulate concurrency
  90 namespace tbb {
  91 namespace internal {
  92 size_t my_get_initial_auto_partitioner_divisor() {
  93     const size_t X_FACTOR = 4;
  94     return X_FACTOR * g_threadNums.local();
  95 }
  96
  97 void* __TBB_EXPORTED_FUNC NFS_Allocate( size_t n_element, size_t element_size, void* hint );
  98 void __TBB_EXPORTED_FUNC NFS_Free( void* );
  99
 100 void my_affinity_partitioner_base_v3::resize( unsigned factor ) {
 101     // Check factor to avoid asking for number of workers while there might be no arena.
 102     size_t new_size = factor ? factor * g_threadNums.local() : 0;
 103     if (new_size != my_size) {
 104         if (my_array) {
 105             NFS_Free(my_array);
 106             // Following two assignments must be done here for sake of exception safety.
 107             my_array = NULL;
 108             my_size = 0;
 109         }
 110         if (new_size) {
 111             my_array = static_cast<affinity_id*>(NFS_Allocate(new_size, sizeof(affinity_id), NULL ));
 112             memset(my_array, 0, sizeof(affinity_id) * new_size);
 113             my_size = new_size;
 114         }
 115     }
 116 }
 117
 118 } //namespace internal
 119 // simulate a subset of parallel_for
 120 namespace interface9 {
 121 namespace internal {
 122
 123 // parallel_for algorithm that executes sequentially
 124 template<typename Range, typename Body, typename Partitioner>
 125 class start_for : public fake_task {
 126     Range my_range;
 127     Body my_body;
 128     typename Partitioner::task_partition_type my_partition;
 129     size_t m_executedBegin, m_executedEnd;
 130     bool m_firstTimeRun;
 131     size_t m_joinedBegin, m_joinedEnd;
 132     test_partitioner_utils::BinaryTree* m_tree;
 133 public:
 134     start_for( const Range& range, const Body& body, Partitioner& partitioner,
 135                test_partitioner_utils::BinaryTree* tree ) :
 136         my_range(range), my_body(body), my_partition(partitioner),
 137         m_executedBegin(0), m_executedEnd(0), m_firstTimeRun(true),
 138         m_joinedBegin(/* grows left */ range.end()), m_joinedEnd(range.end()), m_tree(tree)
 139     {
 140         if (m_tree) {
 141             m_tree->push_node( test_partitioner_utils::make_node(my_range.begin(), my_range.end(), affinity()) );
 142         }
 143     }
 144     //! Splitting constructor used to generate children.
 145     /** parent_ becomes left child.  Newly constructed object is right child. */
 146     start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
 147         my_range(parent_.my_range, split_obj),
 148         my_body(parent_.my_body),
 149         my_partition(parent_.my_partition, split_obj),
 150         m_executedBegin(0), m_executedEnd(0), m_firstTimeRun(true),
 151         m_joinedBegin(/* grows left */ my_range.end()), m_joinedEnd(my_range.end()),
 152         m_tree(parent_.m_tree)
 153     {
 154         set_parent(parent_.parent());
 155         my_partition.set_affinity(*this);
 156
 157         if (m_tree) {
 158             // collecting splitting statistics
 159             m_tree->push_node( test_partitioner_utils::make_node(my_range.begin(),
 160                                                                  my_range.end(),
 161                                                                  affinity()) );
 162             m_tree->push_node( test_partitioner_utils::make_node(parent_.my_range.begin(),
 163                                                                  parent_.my_range.end(),
 164                                                                  parent_.affinity()) );
 165         }
 166     }
 167     //! Construct right child from the given range as response to the demand.
 168     /** parent_ remains left child.  Newly constructed object is right child. */
 169     start_for( start_for& parent_, const Range& r, depth_t d ) :
 170         my_range(r),
 171         my_body(parent_.my_body),
 172         my_partition(parent_.my_partition, tbb::split()),
 173         m_executedBegin(0), m_executedEnd(0), m_firstTimeRun(true),
 174         m_joinedBegin(/* grows left */ r.end()), m_joinedEnd(r.end()),
 175         m_tree(parent_.m_tree)
 176     {
 177         set_parent(parent_.parent());
 178         my_partition.set_affinity(*this);
 179         my_partition.align_depth( d );
 180     }
 181     fake_task* execute() __TBB_override {
 182         my_partition.check_being_stolen( *this );
 183         size_t origBegin = my_range.begin();
 184         size_t origEnd = my_range.end();
 185
 186         my_partition.execute(*this, my_range);
 187
 188         ASSERT(m_executedEnd == m_joinedBegin, "Non-continuous execution");
 189         m_executedEnd = m_joinedEnd;
 190
 191         ASSERT(origBegin == m_executedBegin && origEnd == m_executedEnd,
 192                "Not all iterations were processed");
 193         return NULL;
 194     }
 195     //! Run body for range, serves as callback for partitioner
 196     void run_body( Range &r ) {
 197         if( r.is_ensure_non_emptiness() )
 198             ASSERT( !r.empty(), "Empty ranges are not allowed" );
 199         my_body(r);
 200         if (m_firstTimeRun) {
 201             m_firstTimeRun = false;
 202             m_executedBegin = m_executedEnd = r.begin();
 203         }
 204         ASSERT(m_executedBegin <= r.begin() && m_executedEnd <= r.end(),
 205                "Non-continuous execution");
 206         m_executedEnd = r.end();
 207     }
 208     //! spawn right task, serves as callback for partitioner
 209     void offer_work(typename Partitioner::split_type& split_obj) {
 210         start_for sibling(*this, split_obj);
 211         sibling.execute();
 212         join(sibling.m_executedBegin, sibling.m_executedEnd);
 213     }
 214     //! spawn right task, serves as callback for partitioner
 215     void offer_work(const Range& r, depth_t d = 0) {
 216         start_for sibling(*this, r, d);
 217         sibling.execute();
 218         join(sibling.m_executedBegin, sibling.m_executedEnd);
 219     }
 220     void join(size_t siblingExecutedBegin, size_t siblingExecutedEnd) {
 221         ASSERT(siblingExecutedEnd == m_joinedBegin, "?");
 222         m_joinedBegin = siblingExecutedBegin;
 223     }
 224 };
 225
 226 } //namespace internal
 227 } //namespace interfaceX
 228 } //namespace tbb
 229
 230 namespace whitebox_simulation {
 231 using namespace tbb::interface9::internal;
 232 template<typename Range, typename Body, typename Partitioner>
 233 void parallel_for( const Range& range, const Body& body, Partitioner& partitioner,
 234                    test_partitioner_utils::BinaryTree* tree = NULL) {
 235     if (!range.empty()) {
 236         flag_task parent;
 237         start_for<Range, Body, Partitioner> start(range, body, partitioner, tree);
 238         start.set_parent(&parent);
 239         start.execute();
 240     }
 241 }
 242
 243 } //namespace whitebox_simulation
 244
 245 template <typename Range, typename Body, typename Partitioner>
 246 void test_case(Range& range, const Body& body, Partitioner& partitioner,
 247                test_partitioner_utils::BinaryTree* tree = NULL) {
 248     whitebox_simulation::parallel_for(range, body, partitioner, tree);
 249 }
 250
 251 // Functions generate size for range objects used in tests
 252 template <typename T>
 253 size_t default_range_size_generator(T* factor, unsigned index, size_t thread_num) {
 254     return size_t(factor[index] * thread_num);
 255 }
 256
 257 size_t shifted_left_range_size_generator(size_t* factor, unsigned index, size_t thread_num) {
 258     return factor[index] * thread_num - 1;
 259 }
 260
 261 size_t shifted_right_range_size_generator(size_t* factor, unsigned index, size_t thread_num) {
 262     return factor[index] * thread_num + 1;
 263 }
 264
 265 size_t max_range_size_generator(size_t*, unsigned, size_t) {
 266     return size_t(-1);
 267 }
 268
 269 size_t simple_size_generator(size_t*, unsigned index, size_t) {
 270     return index;
 271 }
 272
 273 namespace uniform_iterations_distribution {
 274
 275 /*
 276  * Test checks uniform distribution of range's iterations among all tasks just after
 277  * work distribution phase has been completed and just before work balancing phase has been started
 278  */
 279
 280 using namespace test_partitioner_utils;
 281
 282 class ParallelTestBody {
 283 public:
 284     struct use_case_settings_t;
 285
 286     typedef void (*CheckerFuncType)(const char*, size_t, const use_case_settings_t*, const RangeStatisticData&);
 287
 288     struct use_case_settings_t {
 289         size_t thread_num;                         // number of threads used during current use case
 290         unsigned factors_array_len;                // size of 'factors' array
 291         size_t range_begin;                        // beginning of range iterations
 292         bool provide_feedback;                     // 'true' if range should give feedback
 293         bool ensure_non_empty_size;                // don't allow empty size ranges
 294
 295         size_t above_threads_size_tolerance;       // allowed value for number of created ranges
 296                                                    // when initial size of the range was greater or
 297                                                    // equal to number of threads
 298
 299         size_t below_threads_size_tolerance;       // allowed value for number of created ranges
 300                                                    // when initial size of the range was less than
 301                                                    // number of threads
 302
 303         size_t between_min_max_ranges_tolerance;   // allowed value for difference of iterations
 304                                                    // between bigger and lesser ranges
 305
 306         CheckerFuncType checker;                   // checker function for a particular test case
 307     };
 308
 309     ParallelTestBody(size_t parallel_group_thread_starting_index)
 310         : m_parallel_group_thread_starting_index(parallel_group_thread_starting_index) { }
 311
 312     void operator()(size_t) const { ASSERT( false, "Empty ParallelTestBody called" ); }
 313
 314     static void uniform_distribution_checker(const char* rangeName, size_t rangeSize, const use_case_settings_t* settings,
 315         const RangeStatisticData& stat)
 316     {
 317         // Checking that all threads were given a task
 318         if (rangeSize >= settings->thread_num) {
 319             uint64_t disparity =
 320                 max(stat.m_rangeNum, settings->thread_num) - min(stat.m_rangeNum, settings->thread_num);
 321             if (disparity > settings->above_threads_size_tolerance) {
 322                 REPORT("ERROR: '%s (f=%d|e=%d)': |#ranges(%llu)-#threads(%llu)|=%llu > %llu=tolerance\n",
 323                     rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size), stat.m_rangeNum,
 324                     settings->thread_num, disparity, uint64_t(settings->above_threads_size_tolerance));
 325                 ASSERT(disparity <= settings->above_threads_size_tolerance, "Incorrect number of range "
 326                     "objects was created before work balancing phase started");
 327             }
 328         } else if (settings->ensure_non_empty_size && rangeSize != 0) {
 329             uint64_t disparity = max(stat.m_rangeNum, rangeSize) - min(stat.m_rangeNum, rangeSize);
 330             if (disparity > settings->below_threads_size_tolerance ) {
 331                 REPORT("ERROR: '%s (f=%d|e=%d)': |#ranges-range size|=%llu > %llu=tolerance\n",
 332                     rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
 333                     disparity, uint64_t(settings->below_threads_size_tolerance));
 334                 ASSERT(disparity <= settings->below_threads_size_tolerance, "Incorrect number of range objects"
 335                     " was created before work balancing phase started");
 336             }
 337         }
 338         // Checking difference between min and max number of range iterations
 339         size_t diff = stat.m_maxRangeSize - stat.m_minRangeSize;
 340         if (diff > settings->between_min_max_ranges_tolerance) {
 341             REPORT("ERROR: '%s (f=%d|e=%d)': range size difference=%llu > %llu=tolerance\n",
 342                 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
 343                 uint64_t(diff), uint64_t(settings->between_min_max_ranges_tolerance));
 344             ASSERT(diff <= settings->between_min_max_ranges_tolerance, "Uniform iteration distribution error");
 345         }
 346     }
 347     // Checker for test cases where ranges don't provide feedback during proportional split to
 348     // partitioner and differ from tbb::blocked_range implementation in their splitting algorithm
 349     static void nonuniform_distribution_checker(const char* rangeName, size_t rangeSize, const use_case_settings_t* settings,
 350         const RangeStatisticData& stat)
 351     {
 352         if (stat.m_rangeNum > settings->thread_num) {
 353             REPORT("ERROR: '%s (f=%d|e=%d)': %llu=#ranges > #threads=%llu\n",
 354                 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
 355                 uint64_t(stat.m_rangeNum), uint64_t(settings->thread_num));
 356             ASSERT(stat.m_rangeNum <= settings->thread_num,
 357                 "Incorrect number of range objects was created before work balancing phase started");
 358         }
 359         // Checking difference between min and max number of range iterations
 360         size_t diff = stat.m_maxRangeSize - stat.m_minRangeSize;
 361         if (diff > rangeSize) {
 362             REPORT("ERROR: '%s (f=%d|e=%d)': range size difference=%llu > %llu=initial range size\n",
 363                 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
 364                 uint64_t(diff), uint64_t(rangeSize));
 365             ASSERT(diff <= rangeSize, "Iteration distribution error");
 366         }
 367     }
 368
 369 protected:
 370     size_t m_parallel_group_thread_starting_index; // starting index of thread
 371
 372     template <typename Range, typename Partitioner, typename T>
 373     void test(use_case_settings_t& settings, T factors[], size_t (*rsgFunc)(T*, unsigned, size_t)
 374         = &default_range_size_generator<T>) const
 375     {
 376         for (unsigned i = 0; i < settings.factors_array_len; ++i) {
 377             size_t range_end = rsgFunc(factors, i, settings.thread_num);
 378             RangeStatisticData stat = { /*range num=*/ 0, /*minimal size of range=*/ 0,
 379                 /*maximal size of range=*/ 0, /*minimal size of range was not rewritten yet=*/ false };
 380             Range range = Range(settings.range_begin, range_end, &stat, settings.provide_feedback,
 381                                 settings.ensure_non_empty_size);
 382             Partitioner my_partitioner;
 383             test_case(range, SimpleBody(), my_partitioner, NULL);
 384             size_t range_size = range_end - settings.range_begin;
 385             const char* rangeName = typeid(range).name();
 386             settings.checker(rangeName, range_size, &settings, stat);
 387         }
 388     }
 389 };
 390
 391 template <typename ParallelTestBody>
 392 void test() {
 393     size_t hw_threads_num = tbb::tbb_thread::hardware_concurrency();
 394     size_t threadsToRunOn = std::min<size_t>(max_simulated_threads, hw_threads_num);
 395
 396     size_t parallel_group_thread_starting_index = 1;
 397     while( parallel_group_thread_starting_index <= max_simulated_threads - threadsToRunOn ) {
 398         NativeParallelFor(threadsToRunOn, ParallelTestBody(parallel_group_thread_starting_index));
 399         parallel_group_thread_starting_index += threadsToRunOn;
 400     }
 401     NativeParallelFor(max_simulated_threads - parallel_group_thread_starting_index,
 402         ParallelTestBody(parallel_group_thread_starting_index));
 403 }
 404
 405 namespace task_affinity_whitebox {
 406 size_t range_begin = 0;
 407 size_t range_end = 20;
 408 }
 409
 410 template<typename Partitioner>
 411 void check_tree(const test_partitioner_utils::BinaryTree&);
 412
 413 template<>
 414 void check_tree<tbb::affinity_partitioner>(const test_partitioner_utils::BinaryTree& tree) {
 415     ASSERT(tree == whitebox_simulation::reference_tree,
 416         "affinity_partitioner distributes tasks differently from run to run");
 417 }
 418
 419 template<>
 420 void check_tree<tbb::static_partitioner>(const test_partitioner_utils::BinaryTree& tree) {
 421     std::vector<test_partitioner_utils::TreeNode* > tree_leafs;
 422     tree.fill_leafs(tree_leafs);
 423     typedef std::vector<size_t> Slots;
 424     Slots affinity_slots(tree_leafs.size() + 1, 0);
 425
 426     for (std::vector<test_partitioner_utils::TreeNode*>::iterator i = tree_leafs.begin(); i != tree_leafs.end(); ++i) {
 427         affinity_slots[(*i)->m_affinity]++;
 428         if ((*i)->m_affinity == 0)
 429             ASSERT((*i)->m_range_begin == task_affinity_whitebox::range_begin,
 430                 "Task with affinity 0 was executed with wrong range");
 431     }
 432
 433     typedef std::iterator_traits<Slots::iterator>::difference_type slots_difference_type;
 434     ASSERT(std::count(affinity_slots.begin(), affinity_slots.end(), size_t(0)) == slots_difference_type(1),
 435         "static_partitioner incorrectly distributed tasks by threads");
 436     ASSERT(std::count(affinity_slots.begin(), affinity_slots.end(), size_t(1)) == slots_difference_type(g_threadNums.local()),
 437         "static_partitioner incorrectly distributed tasks by threads");
 438     ASSERT(affinity_slots[tbb::this_task_arena::current_thread_index() + 1] == 0,
 439         "static_partitioner incorrectly assigns task with 0 affinity");
 440     ASSERT(std::accumulate(affinity_slots.begin(), affinity_slots.end(), size_t(0)) == g_threadNums.local(),
 441         "static_partitioner has created more tasks than the number of threads");
 442 }
 443
 444 template<typename Partitioner>
 445 void test_task_affinity() {
 446     using namespace task_affinity_whitebox;
 447     test_partitioner_utils::SimpleBody body;
 448     for (size_t p = 1; p <= 50; ++p) {
 449         g_threadNums.local() = p;
 450         whitebox_simulation::whitebox_thread_index = 0;
 451         test_partitioner_utils::TestRanges::BlockedRange range(range_begin, range_end, /*statData*/NULL,
 452                                             /*provide_feedback*/false, /*ensure_non_empty_size*/false);
 453         Partitioner partitioner;
 454         whitebox_simulation::reference_tree = test_partitioner_utils::BinaryTree();
 455         whitebox_simulation::parallel_for(range, body, partitioner, &(whitebox_simulation::reference_tree));
 456         while (whitebox_simulation::whitebox_thread_index < p) {
 457             test_partitioner_utils::BinaryTree tree;
 458             whitebox_simulation::parallel_for(range, body, partitioner, &tree);
 459             check_tree<Partitioner>(tree);
 460             whitebox_simulation::whitebox_thread_index++;
 461         }
 462         range_begin++;
 463         range_end += 2;
 464     }
 465 }
 466
 467 } /* namespace uniform_iterations_distribution */