From e484c1759d410ab6229d17251fcd157b76ff840e Mon Sep 17 00:00:00 2001 From: "Dvorskiy, Mikhail" Date: Wed, 4 Mar 2020 13:41:20 +0300 Subject: [PATCH] [pstl] A cleanup fix for sort parallel algorithm. When one of sub-ranges has not been move constructed into a raw buffer, we should not call clean up for that sub-range. Instead of store detailed info about raw buffer history, the fix does the cleanup a sub-range after each moving the sub-range back. https://reviews.llvm.org/D73779 --- pstl/include/pstl/internal/parallel_backend_tbb.h | 176 ++++++++-------------- 1 file changed, 62 insertions(+), 114 deletions(-) diff --git a/pstl/include/pstl/internal/parallel_backend_tbb.h b/pstl/include/pstl/internal/parallel_backend_tbb.h index 244ab4c..1b09ed3 100644 --- a/pstl/include/pstl/internal/parallel_backend_tbb.h +++ b/pstl/include/pstl/internal/parallel_backend_tbb.h @@ -431,7 +431,6 @@ class __merge_task : public tbb::task _SizeType _M_ys, _M_ye; _SizeType _M_zs; _Compare _M_comp; - _Cleanup _M_cleanup; _LeafMerge _M_leaf_merge; _SizeType _M_nsort; //number of elements to be sorted for partial_sort alforithm @@ -440,8 +439,6 @@ class __merge_task : public tbb::task bool _root; //means a task is merging root task bool _x_orig; //"true" means X(or left ) subrange is in the original container; false - in the buffer bool _y_orig; //"true" means Y(or right) subrange is in the original container; false - in the buffer - bool _x_first_move, _y_first_move; //"true" means X and Y subranges are merging into the buffer and move constructor - //should be called instead of just moving. bool _split; //"true" means a merge task is a split task for parallel merging, the execution logic differs bool @@ -512,13 +509,32 @@ class __merge_task : public tbb::task } }; + struct __cleanup_range + { + template + void + operator()(Iterator __first, Iterator __last) + { + if (__last - __first < __merge_cut_off) + _Cleanup()(__first, __last); + else + { + auto __n = __last - __first; + tbb::parallel_for(tbb::blocked_range<_SizeType>(0, __n, __merge_cut_off), + [__first](const tbb::blocked_range<_SizeType>& __range) { + _Cleanup()(__first + __range.begin(), __first + __range.end()); + }); + } + } + }; + public: __merge_task(_SizeType __xs, _SizeType __xe, _SizeType __ys, _SizeType __ye, _SizeType __zs, _Compare __comp, - _Cleanup __cleanup, _LeafMerge __leaf_merge, _SizeType __nsort, _RandomAccessIterator1 __x_beg, + _Cleanup, _LeafMerge __leaf_merge, _SizeType __nsort, _RandomAccessIterator1 __x_beg, _RandomAccessIterator2 __z_beg, bool __x_orig, bool __y_orig, bool __root) : _M_xs(__xs), _M_xe(__xe), _M_ys(__ys), _M_ye(__ye), _M_zs(__zs), _M_x_beg(__x_beg), _M_z_beg(__z_beg), - _M_comp(__comp), _M_cleanup(__cleanup), _M_leaf_merge(__leaf_merge), _M_nsort(__nsort), _root(__root), - _x_orig(__x_orig), _y_orig(__y_orig), _x_first_move(false), _y_first_move(false), _split(false) + _M_comp(__comp), _M_leaf_merge(__leaf_merge), _M_nsort(__nsort), _root(__root), + _x_orig(__x_orig), _y_orig(__y_orig), _split(false) { } @@ -530,16 +546,6 @@ class __merge_task : public tbb::task template void - set_first_move(IndexType __idx, bool __on_off) - { - if (is_left(__idx)) - _x_first_move = __on_off; - else - _y_first_move = __on_off; - } - - template - void set_odd(IndexType __idx, bool __on_off) { if (is_left(__idx)) @@ -584,19 +590,11 @@ class __merge_task : public tbb::task assert(__nx > 0 && __ny > 0); if (_x_orig) - { - if (_x_first_move) - { - move_range_construct()(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_z_beg + _M_zs); - _x_first_move = false; - } - else - move_range()(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_z_beg + _M_zs); - } + __move_range_construct()(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_z_beg + _M_zs); else { - assert(!_x_first_move); - move_range()(_M_z_beg + _M_zs, _M_z_beg + _M_zs + __nx, _M_x_beg + _M_xs); + __move_range()(_M_z_beg + _M_zs, _M_z_beg + _M_zs + __nx, _M_x_beg + _M_xs); + __cleanup_range()(_M_z_beg + _M_zs, _M_z_beg + _M_zs + __nx); } _x_orig = !_x_orig; @@ -608,19 +606,11 @@ class __merge_task : public tbb::task const auto __ny = (_M_ye - _M_ys); if (_y_orig) - { - if (_y_first_move) - { - move_range_construct()(_M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs + __nx); - _y_first_move = false; - } - else - move_range()(_M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs + __nx); - } + __move_range_construct()(_M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs + __nx); else { - assert(!_y_first_move); - move_range()(_M_z_beg + _M_zs + __nx, _M_z_beg + _M_zs + __nx + __ny, _M_x_beg + _M_ys); + __move_range()(_M_z_beg + _M_zs + __nx, _M_z_beg + _M_zs + __nx + __ny, _M_x_beg + _M_ys); + __cleanup_range()(_M_z_beg + _M_zs + __nx, _M_z_beg + _M_zs + __nx + __ny); } _y_orig = !_y_orig; @@ -641,41 +631,15 @@ class __merge_task : public tbb::task //merge to buffer if (_x_orig) { - assert(is_partial() || std::is_sorted(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_comp)); - assert(is_partial() || std::is_sorted(_M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_comp)); - - if (_x_first_move && _y_first_move) - { - _M_leaf_merge(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs, - _M_comp, move_value_construct(), move_value_construct(), move_range_construct(), - move_range_construct()); - _x_first_move = false, _y_first_move = false; - } - else if (_x_first_move && !_y_first_move) - { - _M_leaf_merge(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs, - _M_comp, move_value_construct(), move_value(), move_range_construct(), move_range()); - _x_first_move = false; - } - else if (!_x_first_move && _y_first_move) - { - _M_leaf_merge(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs, - _M_comp, move_value(), move_value_construct(), move_range(), move_range_construct()); - _y_first_move = false; - } - else - _M_leaf_merge(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs, - _M_comp, move_value(), move_value(), move_range(), move_range()); - - assert(is_partial() || std::is_sorted(_M_z_beg + _M_zs, _M_z_beg + _M_zs + __nx + __ny, _M_comp)); + _M_leaf_merge(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs, + _M_comp, __move_value_construct(), __move_value_construct(), __move_range_construct(), + __move_range_construct()); assert(parent_merge()); //not root merging task } //merge to "origin" else { assert(_x_orig == _y_orig); - assert(!_x_first_move); - assert(!_y_first_move); assert(is_partial() || std::is_sorted(_M_z_beg + _M_xs, _M_z_beg + _M_xe, _M_comp)); assert(is_partial() || std::is_sorted(_M_z_beg + _M_ys, _M_z_beg + _M_ye, _M_comp)); @@ -686,17 +650,12 @@ class __merge_task : public tbb::task _M_leaf_merge(_M_z_beg + _M_xs, _M_z_beg + _M_xe, _M_z_beg + _M_ys, _M_z_beg + _M_ye, _M_x_beg + _M_zs, _M_comp, move_value(), move_value(), move_range(), move_range()); - assert(is_partial() || std::is_sorted(_M_x_beg + _M_zs, _M_x_beg + _M_zs + __nx + __ny, _M_comp)); - - //in case of the root merge task - clean the buffer - if (!parent_merge()) - { - _M_cleanup(_M_z_beg + _M_xs, _M_z_beg + _M_xe); - _M_cleanup(_M_z_beg + _M_ys, _M_z_beg + _M_ye); - } + __cleanup_range()(_M_z_beg + _M_xs, _M_z_beg + _M_xe); + __cleanup_range()(_M_z_beg + _M_ys, _M_z_beg + _M_ye); } return nullptr; } + tbb::task* process_ranges() { @@ -705,49 +664,41 @@ class __merge_task : public tbb::task auto p = parent_merge(); - //optimization, just for sort algorithm, not for partial_sort - //{x} <= {y} - if (!is_partial() && x_less_y()) - { - if (p) + if (!p) + { //root merging task + + //optimization, just for sort algorithm, //{x} <= {y} + if (!is_partial() && x_less_y()) //we have a solution { - const auto id_range = _M_zs; - p->set_odd(id_range, _x_orig); - p->set_first_move(id_range, _x_first_move); + if (!_x_orig) + { //we have to move the solution to the origin + move_x_range(); //parallel moving + move_y_range(); //parallel moving + } + return nullptr; } - else - { //root task - - //clean the buffer - if (!_x_first_move) - _M_cleanup(_M_z_beg + _M_xs, _M_z_beg + _M_xe); - - if (!_y_first_move) - _M_cleanup(_M_z_beg + _M_ys, _M_z_beg + _M_ye); + //else: if we have data in the origin, + //we have to move data to the buffer for final merging into the origin. + if (_x_orig) + { + move_x_range(); //parallel moving + move_y_range(); //parallel moving } - return nullptr; - } - - //in case of the root merge task - move to the buffer firstly - //the root merging task - if (!p && _x_orig) - { - assert(_y_orig); - - move_x_range(); - move_y_range(); + // need to merge {x} and {y}. + return merge_ranges(); } - - //we have to revert "_x(y)_orig" flag of the parent merging task - if (p) + //else: not root merging task (parent_merge() == NULL) + //optimization, just for sort algorithm, //{x} <= {y} + if (!is_partial() && x_less_y()) { const auto id_range = _M_zs; - p->set_odd(id_range, !_x_orig); + p->set_odd(id_range, _x_orig); + return nullptr; } + //else: we have to revert "_x(y)_orig" flag of the parent merging task + const auto id_range = _M_zs; + p->set_odd(id_range, !_x_orig); - const _SizeType __n = (_M_xe - _M_xs) + (_M_ye - _M_ys); - - // need to merge {x} and {y} return merge_ranges(); } @@ -783,11 +734,9 @@ class __merge_task : public tbb::task auto __zm = _M_zs + ((__xm - _M_xs) + (__ym - _M_ys)); __merge_task* __right = new (tbb::task::allocate_additional_child_of(*parent())) - __merge_task(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _M_cleanup, _M_leaf_merge, _M_nsort, _M_x_beg, + __merge_task(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _Cleanup(), _M_leaf_merge, _M_nsort, _M_x_beg, _M_z_beg, _x_orig, _y_orig, _root); - __right->_x_first_move = _x_first_move; - __right->_y_first_move = _y_first_move; __right->_split = true; tbb::task::spawn(*__right); @@ -891,7 +840,6 @@ __stable_sort_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _Le tbb::task* p = parent(); const auto id_range = _M_xs - _M_x_beg; - static_cast<_MergeTaskType*>(p)->set_first_move(id_range, true); return nullptr; } -- 2.7.4