From 1bf7a84d9d07d5c3dbc40fd202c25fedae926451 Mon Sep 17 00:00:00 2001 From: paolo Date: Thu, 5 Nov 2009 14:06:13 +0000 Subject: [PATCH] 2009-11-05 Paolo Carlini * include/parallel/multiway_merge.h: Simple formatting and uglification fixes. * include/parallel/losertree.h: Likewise. * include/parallel/base.h: Likewise. * include/parallel/par_loop.h: Likewise. * include/parallel/omp_loop_static.h: Likewise. * include/parallel/multiway_mergesort.h: Likewise. * include/parallel/partial_sum.h: Likewise. * include/parallel/omp_loop.h: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@153939 138bc75d-0d04-0410-961f-82ee72b054a4 --- libstdc++-v3/ChangeLog | 12 + libstdc++-v3/include/parallel/base.h | 650 +++++++++--------- libstdc++-v3/include/parallel/losertree.h | 23 +- libstdc++-v3/include/parallel/multiway_merge.h | 366 +++++------ libstdc++-v3/include/parallel/multiway_mergesort.h | 731 ++++++++++----------- libstdc++-v3/include/parallel/omp_loop.h | 98 +-- libstdc++-v3/include/parallel/omp_loop_static.h | 74 +-- libstdc++-v3/include/parallel/par_loop.h | 161 +++-- libstdc++-v3/include/parallel/partial_sum.h | 274 ++++---- 9 files changed, 1169 insertions(+), 1220 deletions(-) diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index d30a728..58bd89f 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,15 @@ +2009-11-05 Paolo Carlini + + * include/parallel/multiway_merge.h: Simple formatting and + uglification fixes. + * include/parallel/losertree.h: Likewise. + * include/parallel/base.h: Likewise. + * include/parallel/par_loop.h: Likewise. + * include/parallel/omp_loop_static.h: Likewise. + * include/parallel/multiway_mergesort.h: Likewise. + * include/parallel/partial_sum.h: Likewise. + * include/parallel/omp_loop.h: Likewise. + 2009-11-04 Benjamin Kosnik * testsuite/25_algorithms/fill/5.cc: Move... diff --git a/libstdc++-v3/include/parallel/base.h b/libstdc++-v3/include/parallel/base.h index 6bdcedc..eee88bd 100644 --- a/libstdc++-v3/include/parallel/base.h +++ b/libstdc++-v3/include/parallel/base.h @@ -93,13 +93,13 @@ namespace __gnu_parallel __is_parallel(const _Parallelism __p) { return __p != sequential; } -/** @brief Calculates the rounded-down logarithm of @__c __n for base 2. - * @param __n Argument. - * @return Returns 0 for any argument <1. - */ -template - inline _Size - __rd_log2(_Size __n) + /** @brief Calculates the rounded-down logarithm of @__c __n for base 2. + * @param __n Argument. + * @return Returns 0 for any argument <1. + */ + template + inline _Size + __rd_log2(_Size __n) { _Size __k; for (__k = 0; __n > 1; __n >>= 1) @@ -107,356 +107,352 @@ template return __k; } -/** @brief Encode two integers into one gnu_parallel::_CASable. - * @param __a First integer, to be encoded in the most-significant @__c - * _CASable_bits/2 bits. - * @param __b Second integer, to be encoded in the least-significant - * @__c _CASable_bits/2 bits. - * @return value encoding @__c __a and @__c __b. - * @see decode2 - */ -inline _CASable -__encode2(int __a, int __b) //must all be non-negative, actually -{ - return (((_CASable)__a) << (_CASable_bits / 2)) | (((_CASable)__b) << 0); -} - -/** @brief Decode two integers from one gnu_parallel::_CASable. - * @param __x __gnu_parallel::_CASable to decode integers from. - * @param __a First integer, to be decoded from the most-significant - * @__c _CASable_bits/2 bits of @__c __x. - * @param __b Second integer, to be encoded in the least-significant - * @__c _CASable_bits/2 bits of @__c __x. - * @see __encode2 - */ -inline void -decode2(_CASable __x, int& __a, int& __b) -{ - __a = (int)((__x >> (_CASable_bits / 2)) & _CASable_mask); - __b = (int)((__x >> 0 ) & _CASable_mask); -} - -//needed for parallel "numeric", even if "algorithm" not included - -/** @brief Equivalent to std::min. */ -template - const _Tp& - min(const _Tp& __a, const _Tp& __b) - { return (__a < __b) ? __a : __b; } - -/** @brief Equivalent to std::max. */ -template - const _Tp& - max(const _Tp& __a, const _Tp& __b) - { return (__a > __b) ? __a : __b; } - -/** @brief Constructs predicate for equality from strict weak - * ordering predicate - */ -template - class _EqualFromLess : public std::binary_function<_T1, _T2, bool> + /** @brief Encode two integers into one gnu_parallel::_CASable. + * @param __a First integer, to be encoded in the most-significant @__c + * _CASable_bits/2 bits. + * @param __b Second integer, to be encoded in the least-significant + * @__c _CASable_bits/2 bits. + * @return value encoding @__c __a and @__c __b. + * @see decode2 + */ + inline _CASable + __encode2(int __a, int __b) //must all be non-negative, actually { - private: - _Compare& _M_comp; + return (((_CASable)__a) << (_CASable_bits / 2)) | (((_CASable)__b) << 0); + } - public: - _EqualFromLess(_Compare& __comp) : _M_comp(__comp) { } + /** @brief Decode two integers from one gnu_parallel::_CASable. + * @param __x __gnu_parallel::_CASable to decode integers from. + * @param __a First integer, to be decoded from the most-significant + * @__c _CASable_bits/2 bits of @__c __x. + * @param __b Second integer, to be encoded in the least-significant + * @__c _CASable_bits/2 bits of @__c __x. + * @see __encode2 + */ + inline void + decode2(_CASable __x, int& __a, int& __b) + { + __a = (int)((__x >> (_CASable_bits / 2)) & _CASable_mask); + __b = (int)((__x >> 0 ) & _CASable_mask); + } - bool operator()(const _T1& __a, const _T2& __b) - { - return !_M_comp(__a, __b) && !_M_comp(__b, __a); - } - }; + //needed for parallel "numeric", even if "algorithm" not included + /** @brief Equivalent to std::min. */ + template + const _Tp& + min(const _Tp& __a, const _Tp& __b) + { return (__a < __b) ? __a : __b; } -/** @brief Similar to std::binder1st, - * but giving the argument types explicitly. */ -template - class __unary_negate - : public std::unary_function - { - protected: - _Predicate _M_pred; - - public: - explicit - __unary_negate(const _Predicate& __x) : _M_pred(__x) { } - - bool - operator()(const argument_type& __x) - { return !_M_pred(__x); } - }; - -/** @brief Similar to std::binder1st, - * but giving the argument types explicitly. */ -template - class __binder1st - : public std::unary_function<_SecondArgumentType, _ResultType> - { - protected: - _Operation _M_op; - _FirstArgumentType _M_value; - - public: - __binder1st(const _Operation& __x, - const _FirstArgumentType& __y) - : _M_op(__x), _M_value(__y) { } - - _ResultType - operator()(const _SecondArgumentType& __x) - { return _M_op(_M_value, __x); } - - // _GLIBCXX_RESOLVE_LIB_DEFECTS - // 109. Missing binders for non-const sequence elements - _ResultType - operator()(_SecondArgumentType& __x) const - { return _M_op(_M_value, __x); } - }; + /** @brief Equivalent to std::max. */ + template + const _Tp& + max(const _Tp& __a, const _Tp& __b) + { return (__a > __b) ? __a : __b; } + + /** @brief Constructs predicate for equality from strict weak + * ordering predicate + */ + template + class _EqualFromLess : public std::binary_function<_T1, _T2, bool> + { + private: + _Compare& _M_comp; -/** - * @brief Similar to std::binder2nd, but giving the argument types - * explicitly. - */ -template - class binder2nd - : public std::unary_function<_FirstArgumentType, _ResultType> - { - protected: - _Operation _M_op; - _SecondArgumentType _M_value; - - public: - binder2nd(const _Operation& __x, - const _SecondArgumentType& __y) - : _M_op(__x), _M_value(__y) { } - - _ResultType - operator()(const _FirstArgumentType& __x) const - { return _M_op(__x, _M_value); } - - // _GLIBCXX_RESOLVE_LIB_DEFECTS - // 109. Missing binders for non-const sequence elements - _ResultType - operator()(_FirstArgumentType& __x) - { return _M_op(__x, _M_value); } - }; - -/** @brief Similar to std::equal_to, but allows two different types. */ -template - struct _EqualTo : std::binary_function<_T1, _T2, bool> - { - bool operator()(const _T1& __t1, const _T2& __t2) const - { return __t1 == __t2; } - }; + public: + _EqualFromLess(_Compare& __comp) : _M_comp(__comp) { } -/** @brief Similar to std::less, but allows two different types. */ -template - struct _Less : std::binary_function<_T1, _T2, bool> - { - bool - operator()(const _T1& __t1, const _T2& __t2) const - { return __t1 < __t2; } - - bool - operator()(const _T2& __t2, const _T1& __t1) const - { return __t2 < __t1; } - }; - -// Partial specialization for one type. Same as std::less. -template -struct _Less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool> - { - bool - operator()(const _Tp& __x, const _Tp& __y) const - { return __x < __y; } - }; + bool operator()(const _T1& __a, const _T2& __b) + { return !_M_comp(__a, __b) && !_M_comp(__b, __a); } + }; - /** @brief Similar to std::plus, but allows two different types. */ -template - struct _Plus : public std::binary_function<_Tp1, _Tp2, _Tp1> - { - typedef __typeof__(*static_cast<_Tp1*>(NULL) - + *static_cast<_Tp2*>(NULL)) __result; + /** @brief Similar to std::binder1st, + * but giving the argument types explicitly. */ + template + class __unary_negate + : public std::unary_function + { + protected: + _Predicate _M_pred; + + public: + explicit + __unary_negate(const _Predicate& __x) : _M_pred(__x) { } + + bool + operator()(const argument_type& __x) + { return !_M_pred(__x); } + }; + + /** @brief Similar to std::binder1st, + * but giving the argument types explicitly. */ + template + class __binder1st + : public std::unary_function<_SecondArgumentType, _ResultType> + { + protected: + _Operation _M_op; + _FirstArgumentType _M_value; + + public: + __binder1st(const _Operation& __x, const _FirstArgumentType& __y) + : _M_op(__x), _M_value(__y) { } + + _ResultType + operator()(const _SecondArgumentType& __x) + { return _M_op(_M_value, __x); } + + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 109. Missing binders for non-const sequence elements + _ResultType + operator()(_SecondArgumentType& __x) const + { return _M_op(_M_value, __x); } + }; + + /** + * @brief Similar to std::binder2nd, but giving the argument types + * explicitly. + */ + template + class binder2nd + : public std::unary_function<_FirstArgumentType, _ResultType> + { + protected: + _Operation _M_op; + _SecondArgumentType _M_value; + + public: + binder2nd(const _Operation& __x, const _SecondArgumentType& __y) + : _M_op(__x), _M_value(__y) { } + + _ResultType + operator()(const _FirstArgumentType& __x) const + { return _M_op(__x, _M_value); } + + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 109. Missing binders for non-const sequence elements + _ResultType + operator()(_FirstArgumentType& __x) + { return _M_op(__x, _M_value); } + }; + + /** @brief Similar to std::equal_to, but allows two different types. */ + template + struct _EqualTo : std::binary_function<_T1, _T2, bool> + { + bool operator()(const _T1& __t1, const _T2& __t2) const + { return __t1 == __t2; } + }; - __result - operator()(const _Tp1& __x, const _Tp2& __y) const - { return __x + __y; } - }; + /** @brief Similar to std::less, but allows two different types. */ + template + struct _Less : std::binary_function<_T1, _T2, bool> + { + bool + operator()(const _T1& __t1, const _T2& __t2) const + { return __t1 < __t2; } + + bool + operator()(const _T2& __t2, const _T1& __t1) const + { return __t2 < __t1; } + }; + + // Partial specialization for one type. Same as std::less. + template + struct _Less<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, bool> + { + bool + operator()(const _Tp& __x, const _Tp& __y) const + { return __x < __y; } + }; -// Partial specialization for one type. Same as std::plus. -template - struct _Plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> - { - typedef __typeof__(*static_cast<_Tp*>(NULL) - + *static_cast<_Tp*>(NULL)) __result; - __result - operator()(const _Tp& __x, const _Tp& __y) const - { return __x + __y; } - }; + /** @brief Similar to std::plus, but allows two different types. */ + template + struct _Plus : public std::binary_function<_Tp1, _Tp2, _Tp1> + { + typedef __typeof__(*static_cast<_Tp1*>(NULL) + + *static_cast<_Tp2*>(NULL)) __result; + __result + operator()(const _Tp1& __x, const _Tp2& __y) const + { return __x + __y; } + }; -/** @brief Similar to std::multiplies, but allows two different types. */ -template - struct _Multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1> - { - typedef __typeof__(*static_cast<_Tp1*>(NULL) - * *static_cast<_Tp2*>(NULL)) __result; + // Partial specialization for one type. Same as std::plus. + template + struct _Plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> + { + typedef __typeof__(*static_cast<_Tp*>(NULL) + + *static_cast<_Tp*>(NULL)) __result; - __result - operator()(const _Tp1& __x, const _Tp2& __y) const - { return __x * __y; } - }; + __result + operator()(const _Tp& __x, const _Tp& __y) const + { return __x + __y; } + }; -// Partial specialization for one type. Same as std::multiplies. -template - struct _Multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> - { - typedef __typeof__(*static_cast<_Tp*>(NULL) - * *static_cast<_Tp*>(NULL)) __result; - __result - operator()(const _Tp& __x, const _Tp& __y) const - { return __x * __y; } - }; + /** @brief Similar to std::multiplies, but allows two different types. */ + template + struct _Multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1> + { + typedef __typeof__(*static_cast<_Tp1*>(NULL) + * *static_cast<_Tp2*>(NULL)) __result; + __result + operator()(const _Tp1& __x, const _Tp2& __y) const + { return __x * __y; } + }; -template - class _PseudoSequence; + // Partial specialization for one type. Same as std::multiplies. + template + struct _Multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> + { + typedef __typeof__(*static_cast<_Tp*>(NULL) + * *static_cast<_Tp*>(NULL)) __result; -/** @brief _Iterator associated with __gnu_parallel::_PseudoSequence. - * If features the usual random-access iterator functionality. - * @param _Tp Sequence _M_value type. - * @param _DifferenceType Sequence difference type. - */ -template - class _PseudoSequenceIterator - { - public: - typedef _DifferenceTp _DifferenceType; + __result + operator()(const _Tp& __x, const _Tp& __y) const + { return __x * __y; } + }; - private: - const _Tp& _M_val; - _DifferenceType _M_pos; - public: - _PseudoSequenceIterator(const _Tp& _M_val, _DifferenceType _M_pos) - : _M_val(_M_val), _M_pos(_M_pos) { } + template + class _PseudoSequence; - // Pre-increment operator. - _PseudoSequenceIterator& - operator++() + /** @brief _Iterator associated with __gnu_parallel::_PseudoSequence. + * If features the usual random-access iterator functionality. + * @param _Tp Sequence _M_value type. + * @param _DifferenceType Sequence difference type. + */ + template + class _PseudoSequenceIterator { - ++_M_pos; - return *this; - } + public: + typedef _DifferenceTp _DifferenceType; - // Post-increment operator. - const _PseudoSequenceIterator - operator++(int) - { return _PseudoSequenceIterator(_M_pos++); } + private: + const _Tp& _M_val; + _DifferenceType _M_pos; - const _Tp& - operator*() const - { return _M_val; } + public: + _PseudoSequenceIterator(const _Tp& __val, _DifferenceType __pos) + : _M_val(__val), _M_pos(__pos) { } - const _Tp& - operator[](_DifferenceType) const - { return _M_val; } - - bool - operator==(const _PseudoSequenceIterator& __i2) - { return _M_pos == __i2._M_pos; } - - _DifferenceType - operator!=(const _PseudoSequenceIterator& __i2) - { return _M_pos != __i2._M_pos; } - - _DifferenceType - operator-(const _PseudoSequenceIterator& __i2) - { return _M_pos - __i2._M_pos; } - }; - -/** @brief Sequence that conceptually consists of multiple copies of - the same element. - * The copies are not stored explicitly, of course. - * @param _Tp Sequence _M_value type. - * @param _DifferenceType Sequence difference type. - */ -template - class _PseudoSequence - { - public: - typedef _DifferenceTp _DifferenceType; - - // Better cast down to uint64_t, than up to _DifferenceTp. - typedef _PseudoSequenceIterator<_Tp, uint64_t> iterator; + // Pre-increment operator. + _PseudoSequenceIterator& + operator++() + { + ++_M_pos; + return *this; + } - /** @brief Constructor. - * @param _M_val Element of the sequence. - * @param __count Number of (virtual) copies. + // Post-increment operator. + const _PseudoSequenceIterator + operator++(int) + { return _PseudoSequenceIterator(_M_pos++); } + + const _Tp& + operator*() const + { return _M_val; } + + const _Tp& + operator[](_DifferenceType) const + { return _M_val; } + + bool + operator==(const _PseudoSequenceIterator& __i2) + { return _M_pos == __i2._M_pos; } + + _DifferenceType + operator!=(const _PseudoSequenceIterator& __i2) + { return _M_pos != __i2._M_pos; } + + _DifferenceType + operator-(const _PseudoSequenceIterator& __i2) + { return _M_pos - __i2._M_pos; } + }; + + /** @brief Sequence that conceptually consists of multiple copies of + the same element. + * The copies are not stored explicitly, of course. + * @param _Tp Sequence _M_value type. + * @param _DifferenceType Sequence difference type. */ - _PseudoSequence(const _Tp& _M_val, _DifferenceType __count) - : _M_val(_M_val), __count(__count) { } - - /** @brief Begin iterator. */ - iterator - begin() const - { return iterator(_M_val, 0); } - - /** @brief End iterator. */ - iterator - end() const - { return iterator(_M_val, __count); } - - private: - const _Tp& _M_val; - _DifferenceType __count; - }; - -/** @brief Functor that does nothing */ -template - class _VoidFunctor - { - inline void - operator()(const _ValueTp& __v) const { } - }; - -/** @brief Compute the median of three referenced elements, - according to @__c __comp. - * @param __a First iterator. - * @param __b Second iterator. - * @param __c Third iterator. - * @param __comp Comparator. - */ -template - _RAIter - __median_of_three_iterators(_RAIter __a, _RAIter __b, - _RAIter __c, _Compare& __comp) - { - if (__comp(*__a, *__b)) - if (__comp(*__b, *__c)) - return __b; + template + class _PseudoSequence + { + public: + typedef _DifferenceTp _DifferenceType; + + // Better cast down to uint64_t, than up to _DifferenceTp. + typedef _PseudoSequenceIterator<_Tp, uint64_t> iterator; + + /** @brief Constructor. + * @param _M_val Element of the sequence. + * @param __count Number of (virtual) copies. + */ + _PseudoSequence(const _Tp& __val, _DifferenceType __count) + : _M_val(__val), _M_count(__count) { } + + /** @brief Begin iterator. */ + iterator + begin() const + { return iterator(_M_val, 0); } + + /** @brief End iterator. */ + iterator + end() const + { return iterator(_M_val, _M_count); } + + private: + const _Tp& _M_val; + _DifferenceType _M_count; + }; + + /** @brief Functor that does nothing */ + template + class _VoidFunctor + { + inline void + operator()(const _ValueTp& __v) const { } + }; + + /** @brief Compute the median of three referenced elements, + according to @__c __comp. + * @param __a First iterator. + * @param __b Second iterator. + * @param __c Third iterator. + * @param __comp Comparator. + */ + template + _RAIter + __median_of_three_iterators(_RAIter __a, _RAIter __b, + _RAIter __c, _Compare& __comp) + { + if (__comp(*__a, *__b)) + if (__comp(*__b, *__c)) + return __b; + else + if (__comp(*__a, *__c)) + return __c; + else + return __a; else - if (__comp(*__a, *__c)) - return __c; - else - return __a; - else - { - // Just swap __a and __b. - if (__comp(*__a, *__c)) - return __a; - else - if (__comp(*__b, *__c)) - return __c; - else - return __b; - } - } + { + // Just swap __a and __b. + if (__comp(*__a, *__c)) + return __a; + else + if (__comp(*__b, *__c)) + return __c; + else + return __b; + } + } #define _GLIBCXX_PARALLEL_ASSERT(_Condition) __glibcxx_assert(_Condition) diff --git a/libstdc++-v3/include/parallel/losertree.h b/libstdc++-v3/include/parallel/losertree.h index f83333b..9b9914f 100644 --- a/libstdc++-v3/include/parallel/losertree.h +++ b/libstdc++-v3/include/parallel/losertree.h @@ -163,7 +163,8 @@ template */ template - class _LoserTree : public _LoserTreeBase<_Tp, _Compare> + class _LoserTree + : public _LoserTreeBase<_Tp, _Compare> { typedef _LoserTreeBase<_Tp, _Compare> _Base; using _Base::_M_k; @@ -797,7 +798,7 @@ public: * run empty. This is a very fast variant. */ template - class LoserTreePointerUnguardedBase + class _LoserTreePointerUnguardedBase { protected: struct _Loser @@ -812,8 +813,8 @@ template public: - LoserTreePointerUnguardedBase(unsigned int __k, const _Tp& _sentinel, - _Compare __comp = std::less<_Tp>()) + _LoserTreePointerUnguardedBase(unsigned int __k, const _Tp& _sentinel, + _Compare __comp = std::less<_Tp>()) : _M_comp(__comp) { _M_ik = __k; @@ -831,7 +832,7 @@ template } } - ~LoserTreePointerUnguardedBase() + ~_LoserTreePointerUnguardedBase() { delete[] _M_losers; } int @@ -861,16 +862,16 @@ template */ template class _LoserTreePointerUnguarded - : public LoserTreePointerUnguardedBase<_Tp, _Compare> + : public _LoserTreePointerUnguardedBase<_Tp, _Compare> { - typedef LoserTreePointerUnguardedBase<_Tp, _Compare> _Base; + typedef _LoserTreePointerUnguardedBase<_Tp, _Compare> _Base; using _Base::_M_k; using _Base::_M_losers; public: _LoserTreePointerUnguarded(unsigned int __k, const _Tp& _sentinel, _Compare __comp = std::less<_Tp>()) - : _Base::LoserTreePointerUnguardedBase(__k, _sentinel, __comp) + : _Base::_LoserTreePointerUnguardedBase(__k, _sentinel, __comp) { } unsigned int @@ -945,16 +946,16 @@ template */ template class _LoserTreePointerUnguarded - : public LoserTreePointerUnguardedBase<_Tp, _Compare> + : public _LoserTreePointerUnguardedBase<_Tp, _Compare> { - typedef LoserTreePointerUnguardedBase<_Tp, _Compare> _Base; + typedef _LoserTreePointerUnguardedBase<_Tp, _Compare> _Base; using _Base::_M_k; using _Base::_M_losers; public: _LoserTreePointerUnguarded(unsigned int __k, const _Tp& _sentinel, _Compare __comp = std::less<_Tp>()) - : _Base::LoserTreePointerUnguardedBase(__k, _sentinel, __comp) + : _Base::_LoserTreePointerUnguardedBase(__k, _sentinel, __comp) { } unsigned int diff --git a/libstdc++-v3/include/parallel/multiway_merge.h b/libstdc++-v3/include/parallel/multiway_merge.h index 71ef90c..a5bb2d7 100644 --- a/libstdc++-v3/include/parallel/multiway_merge.h +++ b/libstdc++-v3/include/parallel/multiway_merge.h @@ -54,24 +54,6 @@ namespace __gnu_parallel { - -// Announce guarded and unguarded iterator. - -template - class _GuardedIterator; - -// Making the arguments const references seems to dangerous, -// the user-defined comparator might not be const. -template - inline bool - operator<(_GuardedIterator<_RAIter, _Compare>& __bi1, - _GuardedIterator<_RAIter, _Compare>& __bi2); - -template - inline bool - operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1, - _GuardedIterator<_RAIter, _Compare>& __bi2); - /** @brief _Iterator wrapper supporting an implicit supremum at the end * of the sequence, dominating all comparisons. * @@ -96,12 +78,11 @@ template public: /** @brief Constructor. Sets iterator to beginning of sequence. * @param __begin Begin iterator of sequence. - * @param _M_end End iterator of sequence. + * @param __end End iterator of sequence. * @param __comp Comparator provided for associated overloaded * compare operators. */ - _GuardedIterator(_RAIter __begin, - _RAIter _M_end, _Compare& __comp) - : _M_current(__begin), _M_end(_M_end), __comp(__comp) + _GuardedIterator(_RAIter __begin, _RAIter __end, _Compare& __comp) + : _M_current(__begin), _M_end(__end), __comp(__comp) { } /** @brief Pre-increment operator. @@ -124,62 +105,37 @@ template operator _RAIter() { return _M_current; } + /** @brief Compare two elements referenced by guarded iterators. + * @param __bi1 First iterator. + * @param __bi2 Second iterator. + * @return @__c true if less. */ friend bool - operator< <_RAIter, _Compare>( - _GuardedIterator<_RAIter, _Compare>& __bi1, - _GuardedIterator<_RAIter, _Compare>& __bi2); + operator<(_GuardedIterator<_RAIter, _Compare>& __bi1, + _GuardedIterator<_RAIter, _Compare>& __bi2) + { + if (__bi1._M_current == __bi1._M_end) //__bi1 is sup + return __bi2._M_current == __bi2._M_end; //__bi2 is not sup + if (__bi2._M_current == __bi2._M_end) //__bi2 is sup + return true; + return (__bi1.__comp)(*__bi1, *__bi2); //normal compare + } + /** @brief Compare two elements referenced by guarded iterators. + * @param __bi1 First iterator. + * @param __bi2 Second iterator. + * @return @__c True if less equal. */ friend bool - operator<= <_RAIter, _Compare>( - _GuardedIterator<_RAIter, _Compare>& __bi1, - _GuardedIterator<_RAIter, _Compare>& __bi2); + operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1, + _GuardedIterator<_RAIter, _Compare>& __bi2) + { + if (__bi2._M_current == __bi2._M_end) //__bi1 is sup + return __bi1._M_current != __bi1._M_end; //__bi2 is not sup + if (__bi1._M_current == __bi1._M_end) //__bi2 is sup + return false; + return !(__bi1.__comp)(*__bi2, *__bi1); //normal compare + } }; -/** @brief Compare two elements referenced by guarded iterators. - * @param __bi1 First iterator. - * @param __bi2 Second iterator. - * @return @__c true if less. */ -template - inline bool - operator<(_GuardedIterator<_RAIter, _Compare>& __bi1, - _GuardedIterator<_RAIter, _Compare>& __bi2) - { - if (__bi1._M_current == __bi1._M_end) //__bi1 is sup - return __bi2._M_current == __bi2._M_end; //__bi2 is not sup - if (__bi2._M_current == __bi2._M_end) //__bi2 is sup - return true; - return (__bi1.__comp)(*__bi1, *__bi2); //normal compare - } - -/** @brief Compare two elements referenced by guarded iterators. - * @param __bi1 First iterator. - * @param __bi2 Second iterator. - * @return @__c True if less equal. */ -template - inline bool - operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1, - _GuardedIterator<_RAIter, _Compare>& __bi2) - { - if (__bi2._M_current == __bi2._M_end) //__bi1 is sup - return __bi1._M_current != __bi1._M_end; //__bi2 is not sup - if (__bi1._M_current == __bi1._M_end) //__bi2 is sup - return false; - return !(__bi1.__comp)(*__bi2, *__bi1); //normal compare - } - -template - class _UnguardedIterator; - -template - inline bool - operator<(_UnguardedIterator<_RAIter, _Compare>& __bi1, - _UnguardedIterator<_RAIter, _Compare>& __bi2); - -template - inline bool - operator<=(_UnguardedIterator<_RAIter, _Compare>& __bi1, - _UnguardedIterator<_RAIter, _Compare>& __bi2); - template class _UnguardedIterator { @@ -192,10 +148,10 @@ template public: /** @brief Constructor. Sets iterator to beginning of sequence. * @param __begin Begin iterator of sequence. - * @param _M_end Unused, only for compatibility. + * @param __end Unused, only for compatibility. * @param __comp Unused, only for compatibility. */ _UnguardedIterator(_RAIter __begin, - _RAIter _M_end, _Compare& __comp) + _RAIter /* __end */, _Compare& __comp) : _M_current(__begin), __comp(__comp) { } @@ -219,43 +175,31 @@ template operator _RAIter() { return _M_current; } + /** @brief Compare two elements referenced by unguarded iterators. + * @param __bi1 First iterator. + * @param __bi2 Second iterator. + * @return @__c true if less. */ friend bool - operator< <_RAIter, _Compare>( - _UnguardedIterator<_RAIter, _Compare>& __bi1, - _UnguardedIterator<_RAIter, _Compare>& __bi2); + operator<(_UnguardedIterator<_RAIter, _Compare>& __bi1, + _UnguardedIterator<_RAIter, _Compare>& __bi2) + { + // Normal compare. + return (__bi1.__comp)(*__bi1, *__bi2); + } + /** @brief Compare two elements referenced by unguarded iterators. + * @param __bi1 First iterator. + * @param __bi2 Second iterator. + * @return @__c True if less equal. */ friend bool - operator<= <_RAIter, _Compare>( - _UnguardedIterator<_RAIter, _Compare>& __bi1, - _UnguardedIterator<_RAIter, _Compare>& __bi2); + operator<=(_UnguardedIterator<_RAIter, _Compare>& __bi1, + _UnguardedIterator<_RAIter, _Compare>& __bi2) + { + // Normal compare. + return !(__bi1.__comp)(*__bi2, *__bi1); + } }; -/** @brief Compare two elements referenced by unguarded iterators. - * @param __bi1 First iterator. - * @param __bi2 Second iterator. - * @return @__c true if less. */ -template - inline bool - operator<(_UnguardedIterator<_RAIter, _Compare>& __bi1, - _UnguardedIterator<_RAIter, _Compare>& __bi2) - { - // Normal compare. - return (__bi1.__comp)(*__bi1, *__bi2); - } - -/** @brief Compare two elements referenced by unguarded iterators. - * @param __bi1 First iterator. - * @param __bi2 Second iterator. - * @return @__c True if less equal. */ -template - inline bool - operator<=(_UnguardedIterator<_RAIter, _Compare>& __bi1, - _UnguardedIterator<_RAIter, _Compare>& __bi2) - { - // Normal compare. - return !(__bi1.__comp)(*__bi2, *__bi1); - } - /** @brief Highly efficient 3-way merging procedure. * * Merging is done with the algorithm implementation described by Peter @@ -287,11 +231,10 @@ template class iterator, typename _DifferenceTp, typename _Compare> _RAIter3 - multiway_merge_3_variant( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _DifferenceTp __length, _Compare __comp) + multiway_merge_3_variant(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) { _GLIBCXX_CALL(__length); @@ -612,15 +555,14 @@ template _RAIter3 - multiway_merge_loser_tree_unguarded( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - const typename std::iterator_traits::value_type::first_type>::value_type& - __sentinel, - _DifferenceTp __length, - _Compare __comp) + multiway_merge_loser_tree_unguarded(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + const typename std::iterator_traits::value_type::first_type>::value_type& + __sentinel, + _DifferenceTp __length, + _Compare __comp) { _GLIBCXX_CALL(__length) typedef _DifferenceTp _DifferenceType; @@ -702,15 +644,14 @@ template _RAIter3 - multiway_merge_loser_tree_sentinel( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, + multiway_merge_loser_tree_sentinel(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, const typename std::iterator_traits::value_type::first_type>::value_type& - __sentinel, - _DifferenceTp __length, - _Compare __comp) + __sentinel, + _DifferenceTp __length, + _Compare __comp) { _GLIBCXX_CALL(__length) @@ -773,16 +714,16 @@ template -struct _LoserTreeTraits -{ - /** - * @brief True iff to use pointers instead of values in loser trees. - * - * The default behavior is to use pointers if the data type is four - * times as big as the pointer to it. - */ - static const bool _M_use_pointer = (sizeof(_Tp) > 4 * sizeof(_Tp*)); -}; + struct _LoserTreeTraits + { + /** + * @brief True iff to use pointers instead of values in loser trees. + * + * The default behavior is to use pointers if the data type is four + * times as big as the pointer to it. + */ + static const bool _M_use_pointer = (sizeof(_Tp) > 4 * sizeof(_Tp*)); + }; /** * @brief Switch for 3-way merging with __sentinels turned off. @@ -796,10 +737,11 @@ template struct __multiway_merge_3_variant_sentinel_switch { - _RAIter3 operator()(_RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _DifferenceTp __length, _Compare __comp) + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) { return multiway_merge_3_variant<_GuardedIterator> (__seqs_begin, __seqs_end, __target, __length, __comp); @@ -815,13 +757,15 @@ template - struct __multiway_merge_3_variant_sentinel_switch - + struct __multiway_merge_3_variant_sentinel_switch { - _RAIter3 operator()(_RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _DifferenceTp __length, _Compare __comp) + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) { return multiway_merge_3_variant<_UnguardedIterator> (__seqs_begin, __seqs_end, __target, __length, __comp); @@ -840,10 +784,11 @@ template struct __multiway_merge_4_variant_sentinel_switch { - _RAIter3 operator()(_RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _DifferenceTp __length, _Compare __comp) + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) { return multiway_merge_4_variant<_GuardedIterator> (__seqs_begin, __seqs_end, __target, __length, __comp); @@ -859,13 +804,15 @@ template - struct __multiway_merge_4_variant_sentinel_switch - + struct __multiway_merge_4_variant_sentinel_switch { - _RAIter3 operator()(_RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - _DifferenceTp __length, _Compare __comp) + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + _DifferenceTp __length, _Compare __comp) { return multiway_merge_4_variant<_UnguardedIterator> (__seqs_begin, __seqs_end, __target, __length, __comp); @@ -882,30 +829,30 @@ template struct __multiway_merge_k_variant_sentinel_switch - { - _RAIter3 operator() - (_RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, - const typename std::iterator_traits::value_type::first_type>::value_type& - __sentinel, - _DifferenceTp __length, _Compare __comp) - { - typedef typename std::iterator_traits<_RAIterIterator> - ::value_type::first_type - _RAIter1; - typedef typename std::iterator_traits<_RAIter1>::value_type - _ValueType; - - return multiway_merge_loser_tree_sentinel< - typename __gnu_cxx::__conditional_type< - _LoserTreeTraits<_ValueType>::_M_use_pointer, - _LoserTreePointerUnguarded<__stable, _ValueType, _Compare>, - _LoserTreeUnguarded<__stable, _ValueType, _Compare> - >::__type>( - __seqs_begin, __seqs_end, __target, __sentinel, __length, __comp); - } + { + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, + const typename std::iterator_traits::value_type::first_type>::value_type& + __sentinel, + _DifferenceTp __length, _Compare __comp) + { + typedef typename std::iterator_traits<_RAIterIterator> + ::value_type::first_type + _RAIter1; + typedef typename std::iterator_traits<_RAIter1>::value_type + _ValueType; + + return multiway_merge_loser_tree_sentinel< + typename __gnu_cxx::__conditional_type< + _LoserTreeTraits<_ValueType>::_M_use_pointer, + _LoserTreePointerUnguarded<__stable, _ValueType, _Compare>, + _LoserTreeUnguarded<__stable, _ValueType, _Compare> + >::__type> + (__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp); + } }; /** @@ -916,17 +863,18 @@ template - struct __multiway_merge_k_variant_sentinel_switch - + struct __multiway_merge_k_variant_sentinel_switch { - _RAIter3 operator() - (_RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, + _RAIter3 + operator()(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, const typename std::iterator_traits::value_type::first_type>::value_type& - __sentinel, - _DifferenceTp __length, _Compare __comp) + __sentinel, + _DifferenceTp __length, _Compare __comp) { typedef typename std::iterator_traits<_RAIterIterator> ::value_type::first_type @@ -963,14 +911,13 @@ template _RAIter3 - __sequential_multiway_merge( - _RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _RAIter3 __target, + __sequential_multiway_merge(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _RAIter3 __target, const typename std::iterator_traits::value_type::first_type>::value_type& - __sentinel, - _DifferenceTp __length, _Compare __comp) + __sentinel, + _DifferenceTp __length, _Compare __comp) { _GLIBCXX_CALL(__length) @@ -1061,8 +1008,8 @@ template struct _SamplingSorter { - void operator()(_RAIter __first, _RAIter __last, - _StrictWeakOrdering __comp) + void + operator()(_RAIter __first, _RAIter __last, _StrictWeakOrdering __comp) { __gnu_sequential::stable_sort(__first, __last, __comp); } }; @@ -1074,8 +1021,8 @@ template template struct _SamplingSorter { - void operator()(_RAIter __first, _RAIter __last, - _StrictWeakOrdering __comp) + void + operator()(_RAIter __first, _RAIter __last, _StrictWeakOrdering __comp) { __gnu_sequential::sort(__first, __last, __comp); } }; @@ -1087,10 +1034,11 @@ template void - multiway_merge_sampling_splitting - (_RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _DifferenceType __length, _DifferenceType __total_length, _Compare __comp, + multiway_merge_sampling_splitting(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _DifferenceType __length, + _DifferenceType __total_length, + _Compare __comp, std::vector > *__pieces) { typedef typename std::iterator_traits<_RAIterIterator> @@ -1168,11 +1116,11 @@ template void - multiway_merge_exact_splitting - (_RAIterIterator __seqs_begin, - _RAIterIterator __seqs_end, - _DifferenceType __length, _DifferenceType __total_length, - _Compare __comp, + multiway_merge_exact_splitting(_RAIterIterator __seqs_begin, + _RAIterIterator __seqs_end, + _DifferenceType __length, + _DifferenceType __total_length, + _Compare __comp, std::vector > *__pieces) { typedef typename std::iterator_traits<_RAIterIterator> diff --git a/libstdc++-v3/include/parallel/multiway_mergesort.h b/libstdc++-v3/include/parallel/multiway_mergesort.h index c7f10ae..3b047b4 100644 --- a/libstdc++-v3/include/parallel/multiway_mergesort.h +++ b/libstdc++-v3/include/parallel/multiway_mergesort.h @@ -41,451 +41,444 @@ namespace __gnu_parallel { + /** @brief Subsequence description. */ + template + struct _Piece + { + typedef _DifferenceTp _DifferenceType; -/** @brief Subsequence description. */ -template - struct _Piece - { - typedef _DifferenceTp _DifferenceType; + /** @brief Begin of subsequence. */ + _DifferenceType _M_begin; - /** @brief Begin of subsequence. */ - _DifferenceType _M_begin; + /** @brief End of subsequence. */ + _DifferenceType _M_end; + }; - /** @brief End of subsequence. */ - _DifferenceType _M_end; - }; + /** @brief Data accessed by all threads. + * + * PMWMS = parallel multiway mergesort */ + template + struct _PMWMSSortingData + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; -/** @brief Data accessed by all threads. - * - * PMWMS = parallel multiway mergesort */ -template - struct _PMWMSSortingData - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - /** @brief Number of threads involved. */ - _ThreadIndex _M_num_threads; - - /** @brief Input __begin. */ - _RAIter _M_source; - - /** @brief Start indices, per thread. */ - _DifferenceType* _M_starts; - - /** @brief Storage in which to sort. */ - _ValueType** _M_temporary; - - /** @brief Samples. */ - _ValueType* _M_samples; - - /** @brief Offsets to add to the found positions. */ - _DifferenceType* _M_offsets; - - /** @brief Pieces of data to merge @__c [thread][__sequence] */ - std::vector<_Piece<_DifferenceType> >* _M_pieces; -}; - -/** - * @brief Select _M_samples from a sequence. - * @param __sd Pointer to algorithm data. _Result will be placed in - * @__c __sd->_M_samples. - * @param __num_samples Number of _M_samples to select. - */ -template - void - __determine_samples(_PMWMSSortingData<_RAIter>* __sd, - _DifferenceTp __num_samples) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef _DifferenceTp _DifferenceType; - - _ThreadIndex __iam = omp_get_thread_num(); - - _DifferenceType* __es = new _DifferenceType[__num_samples + 2]; - - equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], - __num_samples + 1, __es); - - for (_DifferenceType __i = 0; __i < __num_samples; ++__i) - ::new(&(__sd->_M_samples[__iam * __num_samples + __i])) - _ValueType(__sd->_M_source[__sd->_M_starts[__iam] + __es[__i + 1]]); - - delete[] __es; - } - -/** @brief Split consistently. */ -template - struct _SplitConsistently - { - }; + /** @brief Number of threads involved. */ + _ThreadIndex _M_num_threads; -/** @brief Split by exact splitting. */ -template - struct _SplitConsistently - - { - void operator()( - const _ThreadIndex __iam, - _PMWMSSortingData<_RAIter>* __sd, - _Compare& __comp, - const typename - std::iterator_traits<_RAIter>::difference_type - __num_samples) - const - { -# pragma omp barrier - - std::vector > - seqs(__sd->_M_num_threads); - for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) - seqs[__s] = std::make_pair(__sd->_M_temporary[__s], - __sd->_M_temporary[__s] - + (__sd->_M_starts[__s + 1] - - __sd->_M_starts[__s])); - - std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads); - - // if not last thread - if (__iam < __sd->_M_num_threads - 1) - multiseq_partition(seqs.begin(), seqs.end(), - __sd->_M_starts[__iam + 1], _M_offsets.begin(), - __comp); - - for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++) - { - // for each sequence - if (__iam < (__sd->_M_num_threads - 1)) - __sd->_M_pieces[__iam][__seq]._M_end - = _M_offsets[__seq] - seqs[__seq].first; - else - // very end of this sequence - __sd->_M_pieces[__iam][__seq]._M_end = - __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq]; - } + /** @brief Input __begin. */ + _RAIter _M_source; -# pragma omp barrier + /** @brief Start indices, per thread. */ + _DifferenceType* _M_starts; - for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) - { - // For each sequence. - if (__iam > 0) - __sd->_M_pieces[__iam][__seq]._M_begin = - __sd->_M_pieces[__iam - 1][__seq]._M_end; - else - // Absolute beginning. - __sd->_M_pieces[__iam][__seq]._M_begin = 0; - } - } + /** @brief Storage in which to sort. */ + _ValueType** _M_temporary; + + /** @brief Samples. */ + _ValueType* _M_samples; + + /** @brief Offsets to add to the found positions. */ + _DifferenceType* _M_offsets; + + /** @brief Pieces of data to merge @__c [thread][__sequence] */ + std::vector<_Piece<_DifferenceType> >* _M_pieces; }; -/** @brief Split by sampling. */ -template - struct _SplitConsistently - { - void operator()( - const _ThreadIndex __iam, - _PMWMSSortingData<_RAIter>* __sd, - _Compare& __comp, - const typename - std::iterator_traits<_RAIter>::difference_type - __num_samples) - const + /** + * @brief Select _M_samples from a sequence. + * @param __sd Pointer to algorithm data. _Result will be placed in + * @__c __sd->_M_samples. + * @param __num_samples Number of _M_samples to select. + */ + template + void + __determine_samples(_PMWMSSortingData<_RAIter>* __sd, + _DifferenceTp __num_samples) { typedef std::iterator_traits<_RAIter> _TraitsType; typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; + typedef _DifferenceTp _DifferenceType; - __determine_samples(__sd, __num_samples); + _ThreadIndex __iam = omp_get_thread_num(); -# pragma omp barrier + _DifferenceType* __es = new _DifferenceType[__num_samples + 2]; -# pragma omp single - __gnu_sequential::sort(__sd->_M_samples, - __sd->_M_samples - + (__num_samples * __sd->_M_num_threads), - __comp); + equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], + __num_samples + 1, __es); -# pragma omp barrier + for (_DifferenceType __i = 0; __i < __num_samples; ++__i) + ::new(&(__sd->_M_samples[__iam * __num_samples + __i])) + _ValueType(__sd->_M_source[__sd->_M_starts[__iam] + + __es[__i + 1]]); + + delete[] __es; + } - for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) - { - // For each sequence. - if (__num_samples * __iam > 0) - __sd->_M_pieces[__iam][__s]._M_begin = + /** @brief Split consistently. */ + template + struct _SplitConsistently + { }; + + /** @brief Split by exact splitting. */ + template + struct _SplitConsistently + { + void + operator()(const _ThreadIndex __iam, + _PMWMSSortingData<_RAIter>* __sd, + _Compare& __comp, + const typename + std::iterator_traits<_RAIter>::difference_type + __num_samples) const + { +# pragma omp barrier + + std::vector > + seqs(__sd->_M_num_threads); + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) + seqs[__s] = std::make_pair(__sd->_M_temporary[__s], + __sd->_M_temporary[__s] + + (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s])); + + std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads); + + // if not last thread + if (__iam < __sd->_M_num_threads - 1) + multiseq_partition(seqs.begin(), seqs.end(), + __sd->_M_starts[__iam + 1], _M_offsets.begin(), + __comp); + + for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++) + { + // for each sequence + if (__iam < (__sd->_M_num_threads - 1)) + __sd->_M_pieces[__iam][__seq]._M_end + = _M_offsets[__seq] - seqs[__seq].first; + else + // very end of this sequence + __sd->_M_pieces[__iam][__seq]._M_end = + __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq]; + } + +# pragma omp barrier + + for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) + { + // For each sequence. + if (__iam > 0) + __sd->_M_pieces[__iam][__seq]._M_begin = + __sd->_M_pieces[__iam - 1][__seq]._M_end; + else + // Absolute beginning. + __sd->_M_pieces[__iam][__seq]._M_begin = 0; + } + } + }; + + /** @brief Split by sampling. */ + template + struct _SplitConsistently + { + void + operator()(const _ThreadIndex __iam, + _PMWMSSortingData<_RAIter>* __sd, + _Compare& __comp, + const typename + std::iterator_traits<_RAIter>::difference_type + __num_samples) const + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + __determine_samples(__sd, __num_samples); + +# pragma omp barrier + +# pragma omp single + __gnu_sequential::sort(__sd->_M_samples, + __sd->_M_samples + + (__num_samples * __sd->_M_num_threads), + __comp); + +# pragma omp barrier + + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) + { + // For each sequence. + if (__num_samples * __iam > 0) + __sd->_M_pieces[__iam][__s]._M_begin = std::lower_bound(__sd->_M_temporary[__s], - __sd->_M_temporary[__s] - + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]), - __sd->_M_samples[__num_samples * __iam], - __comp) + __sd->_M_temporary[__s] + + (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s]), + __sd->_M_samples[__num_samples * __iam], + __comp) - __sd->_M_temporary[__s]; - else - // Absolute beginning. - __sd->_M_pieces[__iam][__s]._M_begin = 0; + else + // Absolute beginning. + __sd->_M_pieces[__iam][__s]._M_begin = 0; - if ((__num_samples * (__iam + 1)) < - (__num_samples * __sd->_M_num_threads)) - __sd->_M_pieces[__iam][__s]._M_end = + if ((__num_samples * (__iam + 1)) < + (__num_samples * __sd->_M_num_threads)) + __sd->_M_pieces[__iam][__s]._M_end = std::lower_bound(__sd->_M_temporary[__s], - __sd->_M_temporary[__s] - + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]), - __sd->_M_samples[__num_samples * (__iam + 1)], - __comp) + __sd->_M_temporary[__s] + + (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s]), + __sd->_M_samples[__num_samples * (__iam + 1)], + __comp) - __sd->_M_temporary[__s]; - else - // Absolute end. - __sd->_M_pieces[__iam][__s]._M_end = __sd->_M_starts[__s + 1] - - __sd->_M_starts[__s]; - } - } + else + // Absolute end. + __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s]); + } + } }; -template - struct __possibly_stable_sort - { - }; + template + struct __possibly_stable_sort + { }; -template - struct __possibly_stable_sort - { - void operator()(const _RAIter& __begin, - const _RAIter& __end, _Compare& __comp) const + template + struct __possibly_stable_sort { - __gnu_sequential::stable_sort(__begin, __end, __comp); - } - }; + void operator()(const _RAIter& __begin, + const _RAIter& __end, _Compare& __comp) const + { __gnu_sequential::stable_sort(__begin, __end, __comp); } + }; -template - struct __possibly_stable_sort - { - void operator()(const _RAIter __begin, - const _RAIter __end, _Compare& __comp) const + template + struct __possibly_stable_sort { - __gnu_sequential::sort(__begin, __end, __comp); - } - }; - -template - struct __possibly_stable_multiway_merge - { - }; - -template - struct __possibly_stable_multiway_merge - - { - void operator()(const Seq_RAIter& __seqs_begin, - const Seq_RAIter& __seqs_end, - const _RAIter& __target, - _Compare& __comp, - DiffType __length_am) const + void operator()(const _RAIter __begin, + const _RAIter __end, _Compare& __comp) const + { __gnu_sequential::sort(__begin, __end, __comp); } + }; + + template + struct __possibly_stable_multiway_merge + { }; + + template + struct __possibly_stable_multiway_merge { - stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, - __comp, sequential_tag()); - } - }; + void operator()(const Seq_RAIter& __seqs_begin, + const Seq_RAIter& __seqs_end, + const _RAIter& __target, + _Compare& __comp, + _DiffType __length_am) const + { + stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, + __comp, sequential_tag()); + } + }; -template - struct __possibly_stable_multiway_merge - - { - void operator()(const Seq_RAIter& __seqs_begin, + template + struct __possibly_stable_multiway_merge + { + void operator()(const Seq_RAIter& __seqs_begin, const Seq_RAIter& __seqs_end, const _RAIter& __target, _Compare& __comp, - DiffType __length_am) const - { - multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp, + _DiffType __length_am) const + { + multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp, sequential_tag()); - } - }; + } + }; + + /** @brief PMWMS code executed by each thread. + * @param __sd Pointer to algorithm data. + * @param __comp Comparator. + */ + template + void + parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd, + _Compare& __comp) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; -/** @brief PMWMS code executed by each thread. - * @param __sd Pointer to algorithm data. - * @param __comp Comparator. - */ -template - void - parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd, - _Compare& __comp) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - _ThreadIndex __iam = omp_get_thread_num(); - - // Length of this thread's chunk, before merging. - _DifferenceType __length_local - = __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam]; - - // Sort in temporary storage, leave space for sentinel. - - typedef _ValueType* _SortingPlacesIterator; - - __sd->_M_temporary[__iam] = - static_cast<_ValueType*>( - ::operator new(sizeof(_ValueType) * (__length_local + 1))); - - // Copy there. - std::uninitialized_copy( - __sd->_M_source + __sd->_M_starts[__iam], - __sd->_M_source + __sd->_M_starts[__iam] + __length_local, - __sd->_M_temporary[__iam]); - - __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>() + _ThreadIndex __iam = omp_get_thread_num(); + + // Length of this thread's chunk, before merging. + _DifferenceType __length_local + = __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam]; + + // Sort in temporary storage, leave space for sentinel. + + typedef _ValueType* _SortingPlacesIterator; + + __sd->_M_temporary[__iam] = + static_cast<_ValueType*>(::operator new(sizeof(_ValueType) + * (__length_local + 1))); + + // Copy there. + std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam], + __sd->_M_source + __sd->_M_starts[__iam] + + __length_local, + __sd->_M_temporary[__iam]); + + __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>() (__sd->_M_temporary[__iam], - __sd->_M_temporary[__iam] + __length_local, + __sd->_M_temporary[__iam] + __length_local, __comp); - // Invariant: locally sorted subsequence in sd->_M_temporary[__iam], - // __sd->_M_temporary[__iam] + __length_local. + // Invariant: locally sorted subsequence in sd->_M_temporary[__iam], + // __sd->_M_temporary[__iam] + __length_local. - // No barrier here: Synchronization is done by the splitting routine. + // No barrier here: Synchronization is done by the splitting routine. - _DifferenceType __num_samples = + _DifferenceType __num_samples = _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1; - _SplitConsistently - <__exact, _RAIter, _Compare, _SortingPlacesIterator>() + _SplitConsistently + <__exact, _RAIter, _Compare, _SortingPlacesIterator>() (__iam, __sd, __comp, __num_samples); - // Offset from __target __begin, __length after merging. - _DifferenceType __offset = 0, __length_am = 0; - for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) - { - __length_am += __sd->_M_pieces[__iam][__s]._M_end - - __sd->_M_pieces[__iam][__s]._M_begin; - __offset += __sd->_M_pieces[__iam][__s]._M_begin; - } + // Offset from __target __begin, __length after merging. + _DifferenceType __offset = 0, __length_am = 0; + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) + { + __length_am += (__sd->_M_pieces[__iam][__s]._M_end + - __sd->_M_pieces[__iam][__s]._M_begin); + __offset += __sd->_M_pieces[__iam][__s]._M_begin; + } - typedef std::vector< + typedef std::vector< std::pair<_SortingPlacesIterator, _SortingPlacesIterator> > _SeqVector; - _SeqVector seqs(__sd->_M_num_threads); + _SeqVector seqs(__sd->_M_num_threads); - for (int __s = 0; __s < __sd->_M_num_threads; ++__s) - { - seqs[__s] = - std::make_pair( - __sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin, - __sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end); - } + for (int __s = 0; __s < __sd->_M_num_threads; ++__s) + { + seqs[__s] = + std::make_pair + (__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin, + __sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end); + } - __possibly_stable_multiway_merge< + __possibly_stable_multiway_merge< __stable, typename _SeqVector::iterator, _RAIter, _Compare, _DifferenceType>() (seqs.begin(), seqs.end(), - __sd->_M_source + __offset, __comp, + __sd->_M_source + __offset, __comp, __length_am); -# pragma omp barrier +# pragma omp barrier - ::operator delete(__sd->_M_temporary[__iam]); - } + ::operator delete(__sd->_M_temporary[__iam]); + } -/** @brief PMWMS main call. - * @param __begin Begin iterator of sequence. - * @param __end End iterator of sequence. - * @param __comp Comparator. - * @param __n Length of sequence. - * @param __num_threads Number of threads to use. - */ -template - void - parallel_sort_mwms(_RAIter __begin, _RAIter __end, - _Compare __comp, - _ThreadIndex __num_threads) - { - _GLIBCXX_CALL(__end - __begin) + void + parallel_sort_mwms(_RAIter __begin, _RAIter __end, + _Compare __comp, + _ThreadIndex __num_threads) + { + _GLIBCXX_CALL(__end - __begin) - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; - _DifferenceType __n = __end - __begin; + _DifferenceType __n = __end - __begin; - if (__n <= 1) - return; + if (__n <= 1) + return; - // at least one element per thread - if (__num_threads > __n) - __num_threads = static_cast<_ThreadIndex>(__n); + // at least one element per thread + if (__num_threads > __n) + __num_threads = static_cast<_ThreadIndex>(__n); - // shared variables - _PMWMSSortingData<_RAIter> __sd; - _DifferenceType* _M_starts; + // shared variables + _PMWMSSortingData<_RAIter> __sd; + _DifferenceType* _M_starts; -# pragma omp parallel num_threads(__num_threads) +# pragma omp parallel num_threads(__num_threads) { __num_threads = omp_get_num_threads(); //no more threads than requested # pragma omp single - { - __sd._M_num_threads = __num_threads; - __sd._M_source = __begin; - - __sd._M_temporary = new _ValueType*[__num_threads]; - - if (!__exact) - { - _DifferenceType __size = - (_Settings::get().sort_mwms_oversampling * __num_threads - 1) - * __num_threads; - __sd._M_samples = static_cast<_ValueType*>( - ::operator new(__size * sizeof(_ValueType))); - } - else - __sd._M_samples = NULL; - - __sd._M_offsets = new _DifferenceType[__num_threads - 1]; - __sd._M_pieces - = new std::vector<_Piece<_DifferenceType> >[__num_threads]; - for (int __s = 0; __s < __num_threads; ++__s) - __sd._M_pieces[__s].resize(__num_threads); - _M_starts = __sd._M_starts - = new _DifferenceType[__num_threads + 1]; - - _DifferenceType __chunk_length = __n / __num_threads; - _DifferenceType __split = __n % __num_threads; - _DifferenceType __pos = 0; - for (int __i = 0; __i < __num_threads; ++__i) - { - _M_starts[__i] = __pos; - __pos += (__i < __split) - ? (__chunk_length + 1) : __chunk_length; - } - _M_starts[__num_threads] = __pos; - } //single + { + __sd._M_num_threads = __num_threads; + __sd._M_source = __begin; + + __sd._M_temporary = new _ValueType*[__num_threads]; + + if (!__exact) + { + _DifferenceType __size = + (_Settings::get().sort_mwms_oversampling * __num_threads - 1) + * __num_threads; + __sd._M_samples = static_cast<_ValueType*> + (::operator new(__size * sizeof(_ValueType))); + } + else + __sd._M_samples = NULL; + + __sd._M_offsets = new _DifferenceType[__num_threads - 1]; + __sd._M_pieces + = new std::vector<_Piece<_DifferenceType> >[__num_threads]; + for (int __s = 0; __s < __num_threads; ++__s) + __sd._M_pieces[__s].resize(__num_threads); + _M_starts = __sd._M_starts + = new _DifferenceType[__num_threads + 1]; + + _DifferenceType __chunk_length = __n / __num_threads; + _DifferenceType __split = __n % __num_threads; + _DifferenceType __pos = 0; + for (int __i = 0; __i < __num_threads; ++__i) + { + _M_starts[__i] = __pos; + __pos += (__i < __split) + ? (__chunk_length + 1) : __chunk_length; + } + _M_starts[__num_threads] = __pos; + } //single // Now sort in parallel. parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp); } //parallel - delete[] _M_starts; - delete[] __sd._M_temporary; + delete[] _M_starts; + delete[] __sd._M_temporary; - if (!__exact) + if (!__exact) ::operator delete(__sd._M_samples); - delete[] __sd._M_offsets; - delete[] __sd._M_pieces; - } + delete[] __sd._M_offsets; + delete[] __sd._M_pieces; + } + } //namespace __gnu_parallel #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */ diff --git a/libstdc++-v3/include/parallel/omp_loop.h b/libstdc++-v3/include/parallel/omp_loop.h index 2424bfb..136b5c8 100644 --- a/libstdc++-v3/include/parallel/omp_loop.h +++ b/libstdc++-v3/include/parallel/omp_loop.h @@ -41,74 +41,74 @@ namespace __gnu_parallel { -/** @brief Embarrassingly parallel algorithm for random access - * iterators, using an OpenMP for loop. - * - * @param __begin Begin iterator of element sequence. - * @param __end End iterator of element sequence. - * @param __o User-supplied functor (comparator, predicate, adding - * functor, etc.). - * @param __f Functor to "process" an element with __op (depends on - * desired functionality, e. g. for std::for_each(), ...). - * @param __r Functor to "add" a single __result to the already - * processed elements (depends on functionality). - * @param __base Base value for reduction. - * @param __output Pointer to position where final result is written to - * @param __bound Maximum number of elements processed (e. g. for - * std::count_n()). - * @return User-supplied functor (that may contain a part of the result). - */ -template - _Op - __for_each_template_random_access_omp_loop( - _RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r, - _Result __base, _Result& __output, - typename std::iterator_traits<_RAIter>::difference_type __bound) - { - typedef typename - std::iterator_traits<_RAIter>::difference_type + /** @brief Embarrassingly parallel algorithm for random access + * iterators, using an OpenMP for loop. + * + * @param __begin Begin iterator of element sequence. + * @param __end End iterator of element sequence. + * @param __o User-supplied functor (comparator, predicate, adding + * functor, etc.). + * @param __f Functor to "process" an element with __op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param __r Functor to "add" a single __result to the already + * processed elements (depends on functionality). + * @param __base Base value for reduction. + * @param __output Pointer to position where final result is written to + * @param __bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template + _Op + __for_each_template_random_access_omp_loop(_RAIter __begin, _RAIter __end, + _Op __o, _Fu& __f, _Red __r, + _Result __base, + _Result& __output, + typename std::iterator_traits<_RAIter>::difference_type __bound) + { + typedef typename std::iterator_traits<_RAIter>::difference_type _DifferenceType; - _DifferenceType __length = __end - __begin; - _ThreadIndex __num_threads = - __gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length); + _DifferenceType __length = __end - __begin; + _ThreadIndex __num_threads = + __gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length); - _Result *__thread_results; + _Result *__thread_results; -# pragma omp parallel num_threads(__num_threads) +# pragma omp parallel num_threads(__num_threads) { # pragma omp single - { - __num_threads = omp_get_num_threads(); - __thread_results = new _Result[__num_threads]; + { + __num_threads = omp_get_num_threads(); + __thread_results = new _Result[__num_threads]; - for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) - __thread_results[__i] = _Result(); - } + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + __thread_results[__i] = _Result(); + } _ThreadIndex __iam = omp_get_thread_num(); #pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size) for (_DifferenceType __pos = 0; __pos < __length; ++__pos) __thread_results[__iam] = - __r(__thread_results[__iam], __f(__o, __begin+__pos)); + __r(__thread_results[__iam], __f(__o, __begin+__pos)); } //parallel - for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) __output = __r(__output, __thread_results[__i]); - delete [] __thread_results; + delete [] __thread_results; - // Points to last element processed (needed as return value for - // some algorithms like transform). - __f._M_finish_iterator = __begin + __length; + // Points to last element processed (needed as return value for + // some algorithms like transform). + __f._M_finish_iterator = __begin + __length; - return __o; - } + return __o; + } } // end namespace diff --git a/libstdc++-v3/include/parallel/omp_loop_static.h b/libstdc++-v3/include/parallel/omp_loop_static.h index 3d9ed84..e7ca267 100644 --- a/libstdc++-v3/include/parallel/omp_loop_static.h +++ b/libstdc++-v3/include/parallel/omp_loop_static.h @@ -40,7 +40,6 @@ namespace __gnu_parallel { - /** @brief Embarrassingly parallel algorithm for random access * iterators, using an OpenMP for loop with static scheduling. * @@ -58,37 +57,38 @@ namespace __gnu_parallel * std::count_n()). * @return User-supplied functor (that may contain a part of the result). */ -template - _Op - __for_each_template_random_access_omp_loop_static( - _RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r, - _Result __base, _Result& __output, - typename std::iterator_traits<_RAIter>::difference_type __bound) - { - typedef typename - std::iterator_traits<_RAIter>::difference_type - _DifferenceType; - - _DifferenceType __length = __end - __begin; - _ThreadIndex __num_threads = - std::min<_DifferenceType>(__get_max_threads(), __length); - - _Result *__thread_results; - -# pragma omp parallel num_threads(__num_threads) + template + _Op + __for_each_template_random_access_omp_loop_static(_RAIter __begin, + _RAIter __end, _Op __o, + _Fu& __f, _Red __r, + _Result __base, + _Result& __output, + typename std::iterator_traits<_RAIter>::difference_type __bound) + { + typedef typename std::iterator_traits<_RAIter>::difference_type + _DifferenceType; + + _DifferenceType __length = __end - __begin; + _ThreadIndex __num_threads = + std::min<_DifferenceType>(__get_max_threads(), __length); + + _Result *__thread_results; + +# pragma omp parallel num_threads(__num_threads) { # pragma omp single - { - __num_threads = omp_get_num_threads(); - __thread_results = new _Result[__num_threads]; + { + __num_threads = omp_get_num_threads(); + __thread_results = new _Result[__num_threads]; - for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) - __thread_results[__i] = _Result(); - } + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + __thread_results[__i] = _Result(); + } _ThreadIndex __iam = omp_get_thread_num(); @@ -98,17 +98,17 @@ template - _Op - __for_each_template_random_access_ed( - _RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r, - _Result __base, _Result& __output, - typename std::iterator_traits<_RAIter>::difference_type __bound) - { - typedef std::iterator_traits<_RAIter> _TraitsType; - typedef typename _TraitsType::difference_type _DifferenceType; - const _DifferenceType __length = __end - __begin; - _Result *__thread_results; - bool* __constructed; - - _ThreadIndex __num_threads = - __gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length); - -# pragma omp parallel num_threads(__num_threads) + /** @brief Embarrassingly parallel algorithm for random access + * iterators, using hand-crafted parallelization by equal splitting + * the work. + * + * @param __begin Begin iterator of element sequence. + * @param __end End iterator of element sequence. + * @param __o User-supplied functor (comparator, predicate, adding + * functor, ...) + * @param __f Functor to "process" an element with __op (depends on + * desired functionality, e. g. for std::for_each(), ...). + * @param __r Functor to "add" a single __result to the already + * processed elements (depends on functionality). + * @param __base Base value for reduction. + * @param __output Pointer to position where final result is written to + * @param __bound Maximum number of elements processed (e. g. for + * std::count_n()). + * @return User-supplied functor (that may contain a part of the result). + */ + template + _Op + __for_each_template_random_access_ed(_RAIter __begin, _RAIter __end, + _Op __o, _Fu& __f, _Red __r, + _Result __base, _Result& __output, + typename std::iterator_traits<_RAIter>::difference_type __bound) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::difference_type _DifferenceType; + const _DifferenceType __length = __end - __begin; + _Result *__thread_results; + bool* __constructed; + + _ThreadIndex __num_threads = + __gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length); + +# pragma omp parallel num_threads(__num_threads) { # pragma omp single - { - __num_threads = omp_get_num_threads(); - __thread_results = - static_cast<_Result*>( - ::operator new(__num_threads * sizeof(_Result))); - __constructed = new bool[__num_threads]; - } - - _ThreadIndex __iam = omp_get_thread_num(); - - // Neutral element. - _Result* __reduct = - static_cast<_Result*>(::operator new(sizeof(_Result))); - - _DifferenceType - __start = equally_split_point(__length, __num_threads, __iam), - __stop = equally_split_point(__length, __num_threads, __iam + 1); - - if (__start < __stop) - { - new(__reduct) _Result(__f(__o, __begin + __start)); - ++__start; - __constructed[__iam] = true; - } - else - __constructed[__iam] = false; - - for (; __start < __stop; ++__start) - *__reduct = __r(*__reduct, __f(__o, __begin + __start)); - - __thread_results[__iam] = *__reduct; + { + __num_threads = omp_get_num_threads(); + __thread_results = + static_cast<_Result*>(::operator new(__num_threads + * sizeof(_Result))); + __constructed = new bool[__num_threads]; + } + + _ThreadIndex __iam = omp_get_thread_num(); + + // Neutral element. + _Result* __reduct = + static_cast<_Result*>(::operator new(sizeof(_Result))); + + _DifferenceType + __start = equally_split_point(__length, __num_threads, __iam), + __stop = equally_split_point(__length, __num_threads, __iam + 1); + + if (__start < __stop) + { + new(__reduct) _Result(__f(__o, __begin + __start)); + ++__start; + __constructed[__iam] = true; + } + else + __constructed[__iam] = false; + + for (; __start < __stop; ++__start) + *__reduct = __r(*__reduct, __f(__o, __begin + __start)); + + __thread_results[__iam] = *__reduct; } //parallel - for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) - if (__constructed[__i]) - __output = __r(__output, __thread_results[__i]); + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + if (__constructed[__i]) + __output = __r(__output, __thread_results[__i]); - // Points to last element processed (needed as return value for - // some algorithms like transform). - __f._M_finish_iterator = __begin + __length; + // Points to last element processed (needed as return value for + // some algorithms like transform). + __f._M_finish_iterator = __begin + __length; - delete[] __thread_results; - delete[] __constructed; + delete[] __thread_results; + delete[] __constructed; - return __o; - } + return __o; + } } // end namespace diff --git a/libstdc++-v3/include/parallel/partial_sum.h b/libstdc++-v3/include/parallel/partial_sum.h index b121e1f..10673af 100644 --- a/libstdc++-v3/include/parallel/partial_sum.h +++ b/libstdc++-v3/include/parallel/partial_sum.h @@ -43,106 +43,107 @@ namespace __gnu_parallel { // Problem: there is no 0-element given. -/** @brief Base case prefix sum routine. - * @param __begin Begin iterator of input sequence. - * @param __end End iterator of input sequence. - * @param __result Begin iterator of output sequence. - * @param __bin_op Associative binary function. - * @param __value Start value. Must be passed since the neutral - * element is unknown in general. - * @return End iterator of output sequence. */ -template - _OutputIterator - __parallel_partial_sum_basecase( - _IIter __begin, _IIter __end, _OutputIterator __result, - _BinaryOperation __bin_op, - typename std::iterator_traits <_IIter>::value_type __value) - { - if (__begin == __end) + /** @brief Base case prefix sum routine. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __result Begin iterator of output sequence. + * @param __bin_op Associative binary function. + * @param __value Start value. Must be passed since the neutral + * element is unknown in general. + * @return End iterator of output sequence. */ + template + _OutputIterator + __parallel_partial_sum_basecase(_IIter __begin, _IIter __end, + _OutputIterator __result, + _BinaryOperation __bin_op, + typename std::iterator_traits <_IIter>::value_type __value) + { + if (__begin == __end) + return __result; + + while (__begin != __end) + { + __value = __bin_op(__value, *__begin); + *__result = __value; + ++__result; + ++__begin; + } return __result; - - while (__begin != __end) - { - __value = __bin_op(__value, *__begin); - *__result = __value; - ++__result; - ++__begin; - } - return __result; - } - -/** @brief Parallel partial sum implementation, two-phase approach, - no recursion. - * @param __begin Begin iterator of input sequence. - * @param __end End iterator of input sequence. - * @param __result Begin iterator of output sequence. - * @param __bin_op Associative binary function. - * @param __n Length of sequence. - * @param __num_threads Number of threads to use. - * @return End iterator of output sequence. - */ -template - _OutputIterator - __parallel_partial_sum_linear( - _IIter __begin, _IIter __end, _OutputIterator __result, - _BinaryOperation __bin_op, - typename std::iterator_traits<_IIter>::difference_type __n) - { - typedef std::iterator_traits<_IIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - if (__begin == __end) - return __result; - - _ThreadIndex __num_threads = + } + + /** @brief Parallel partial sum implementation, two-phase approach, + no recursion. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __result Begin iterator of output sequence. + * @param __bin_op Associative binary function. + * @param __n Length of sequence. + * @param __num_threads Number of threads to use. + * @return End iterator of output sequence. + */ + template + _OutputIterator + __parallel_partial_sum_linear(_IIter __begin, _IIter __end, + _OutputIterator __result, + _BinaryOperation __bin_op, + typename std::iterator_traits<_IIter>::difference_type __n) + { + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + if (__begin == __end) + return __result; + + _ThreadIndex __num_threads = std::min<_DifferenceType>(__get_max_threads(), __n - 1); - if (__num_threads < 2) - { - *__result = *__begin; - return __parallel_partial_sum_basecase( - __begin + 1, __end, __result + 1, __bin_op, *__begin); - } + if (__num_threads < 2) + { + *__result = *__begin; + return __parallel_partial_sum_basecase(__begin + 1, __end, + __result + 1, __bin_op, + *__begin); + } - _DifferenceType* __borders; - _ValueType* __sums; + _DifferenceType* __borders; + _ValueType* __sums; - const _Settings& __s = _Settings::get(); + const _Settings& __s = _Settings::get(); -# pragma omp parallel num_threads(__num_threads) +# pragma omp parallel num_threads(__num_threads) { # pragma omp single - { - __num_threads = omp_get_num_threads(); - - __borders = new _DifferenceType[__num_threads + 2]; - - if (__s.partial_sum_dilation == 1.0f) - equally_split(__n, __num_threads + 1, __borders); - else - { - _DifferenceType __chunk_length = - ((double)__n - / ((double)__num_threads + __s.partial_sum_dilation)), - __borderstart = __n - __num_threads * __chunk_length; - __borders[0] = 0; - for (int __i = 1; __i < (__num_threads + 1); ++__i) - { - __borders[__i] = __borderstart; - __borderstart += __chunk_length; - } - __borders[__num_threads + 1] = __n; - } - - __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType) + { + __num_threads = omp_get_num_threads(); + + __borders = new _DifferenceType[__num_threads + 2]; + + if (__s.partial_sum_dilation == 1.0f) + equally_split(__n, __num_threads + 1, __borders); + else + { + _DifferenceType __chunk_length = + ((double)__n + / ((double)__num_threads + __s.partial_sum_dilation)), + __borderstart = __n - __num_threads * __chunk_length; + __borders[0] = 0; + for (int __i = 1; __i < (__num_threads + 1); ++__i) + { + __borders[__i] = __borderstart; + __borderstart += __chunk_length; + } + __borders[__num_threads + 1] = __n; + } + + __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType) * __num_threads)); - _OutputIterator __target_end; - } //single + _OutputIterator __target_end; + } //single _ThreadIndex __iam = omp_get_thread_num(); if (__iam == 0) @@ -166,58 +167,57 @@ template - _OutputIterator - __parallel_partial_sum(_IIter __begin, _IIter __end, - _OutputIterator __result, _BinaryOperation __bin_op) - { - _GLIBCXX_CALL(__begin - __end) - - typedef std::iterator_traits<_IIter> _TraitsType; - typedef typename _TraitsType::value_type _ValueType; - typedef typename _TraitsType::difference_type _DifferenceType; - - _DifferenceType __n = __end - __begin; - - switch (_Settings::get().partial_sum_algorithm) - { - case LINEAR: - // Need an initial offset. - return __parallel_partial_sum_linear( - __begin, __end, __result, __bin_op, __n); - default: - // Partial_sum algorithm not implemented. - _GLIBCXX_PARALLEL_ASSERT(0); - return __result + __n; - } - } + ::operator delete(__sums); + delete[] __borders; + + return __result + __n; + } + + /** @brief Parallel partial sum front-__end. + * @param __begin Begin iterator of input sequence. + * @param __end End iterator of input sequence. + * @param __result Begin iterator of output sequence. + * @param __bin_op Associative binary function. + * @return End iterator of output sequence. */ + template + _OutputIterator + __parallel_partial_sum(_IIter __begin, _IIter __end, + _OutputIterator __result, _BinaryOperation __bin_op) + { + _GLIBCXX_CALL(__begin - __end) + + typedef std::iterator_traits<_IIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __n = __end - __begin; + + switch (_Settings::get().partial_sum_algorithm) + { + case LINEAR: + // Need an initial offset. + return __parallel_partial_sum_linear(__begin, __end, __result, + __bin_op, __n); + default: + // Partial_sum algorithm not implemented. + _GLIBCXX_PARALLEL_ASSERT(0); + return __result + __n; + } + } } #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */ -- 2.7.4