From b97859b67416b1c56e90daa34792db124fa85c58 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Tue, 9 May 2023 14:44:13 -0700 Subject: [PATCH] [libc++][PSTL] Move the already implemented functions to the new dispatching scheme Reviewed By: ldionne, #libc Spies: arichardson, pcwang-thead, libcxx-commits, miyuki Differential Revision: https://reviews.llvm.org/D150277 --- libcxx/include/CMakeLists.txt | 4 +- libcxx/include/__algorithm/pstl_any_all_none_of.h | 72 +++++++----- libcxx/include/__algorithm/pstl_backend.h | 18 +++ .../__algorithm/pstl_backends/cpu_backend.h | 6 + .../pstl_backends/cpu_backends/any_of.h | 90 +++++++++++++++ .../__algorithm/pstl_backends/cpu_backends/fill.h | 60 ++++++++++ .../pstl_backends/cpu_backends/find_if.h | 125 +++++++++++++++++++++ .../pstl_backends/cpu_backends/serial.h | 2 + libcxx/include/__algorithm/pstl_fill.h | 65 ++++++----- libcxx/include/__algorithm/pstl_find.h | 103 +++++++---------- libcxx/include/__algorithm/pstl_for_each.h | 4 - .../__pstl/internal/parallel_backend_serial.h | 12 -- libcxx/include/__pstl/internal/parallel_impl.h | 91 --------------- .../include/__pstl/internal/unseq_backend_simd.h | 57 ---------- libcxx/include/module.modulemap.in | 3 + libcxx/test/libcxx/private_headers.verify.cpp | 3 + libcxx/utils/data/ignore_format.txt | 1 - 17 files changed, 427 insertions(+), 289 deletions(-) create mode 100644 libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h create mode 100644 libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h create mode 100644 libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h delete mode 100644 libcxx/include/__pstl/internal/parallel_impl.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 7446442..83b920a 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -72,7 +72,10 @@ set(files __algorithm/pstl_any_all_none_of.h __algorithm/pstl_backend.h __algorithm/pstl_backends/cpu_backend.h + __algorithm/pstl_backends/cpu_backends/any_of.h __algorithm/pstl_backends/cpu_backends/backend.h + __algorithm/pstl_backends/cpu_backends/fill.h + __algorithm/pstl_backends/cpu_backends/find_if.h __algorithm/pstl_backends/cpu_backends/for_each.h __algorithm/pstl_backends/cpu_backends/serial.h __algorithm/pstl_fill.h @@ -538,7 +541,6 @@ set(files __pstl/internal/parallel_backend_serial.h __pstl/internal/parallel_backend_tbb.h __pstl/internal/parallel_backend_utils.h - __pstl/internal/parallel_impl.h __pstl/internal/unseq_backend_simd.h __pstl/internal/utils.h __pstl_algorithm diff --git a/libcxx/include/__algorithm/pstl_any_all_none_of.h b/libcxx/include/__algorithm/pstl_any_all_none_of.h index 4ca4deb..374d9af 100644 --- a/libcxx/include/__algorithm/pstl_any_all_none_of.h +++ b/libcxx/include/__algorithm/pstl_any_all_none_of.h @@ -9,12 +9,10 @@ #ifndef _LIBCPP___ALGORITHM_PSTL_ANY_ALL_NONE_OF_H #define _LIBCPP___ALGORITHM_PSTL_ANY_ALL_NONE_OF_H -#include <__algorithm/any_of.h> +#include <__algorithm/pstl_find.h> +#include <__algorithm/pstl_frontend_dispatch.h> #include <__config> #include <__iterator/iterator_traits.h> -#include <__pstl/internal/execution_impl.h> -#include <__pstl/internal/parallel_impl.h> -#include <__pstl/internal/unseq_backend_simd.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_execution_policy.h> #include <__type_traits/remove_cvref.h> @@ -28,50 +26,66 @@ _LIBCPP_BEGIN_NAMESPACE_STD +template +void __pstl_any_of(); // declaration needed for the frontend dispatch below + template >, int> = 0> + class _RawPolicy = __remove_cvref_t<_ExecutionPolicy>, + enable_if_t, int> = 0> _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI bool any_of(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __is_cpp17_random_access_iterator<_ForwardIterator>::value) { - return std::__terminate_on_exception([&] { - return __pstl::__internal::__parallel_or( - __pstl::__internal::__par_backend_tag{}, - __policy, - __first, - __last, - [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { - return std::any_of(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred); - }); - }); - } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && - __is_cpp17_random_access_iterator<_ForwardIterator>::value) { - return __pstl::__unseq_backend::__simd_or(__first, __last - __first, __pred); - } else { - return std::any_of(__first, __last, __pred); - } + return std::__pstl_frontend_dispatch( + _LIBCPP_PSTL_CUSTOMIZATION_POINT(__pstl_any_of), + [&](_ForwardIterator __g_first, _ForwardIterator __g_last, _Predicate __g_pred) { + return std::find_if(__policy, __g_first, __g_last, __g_pred) != __g_last; + }, + std::move(__first), + std::move(__last), + std::move(__pred)); } +template +void __pstl_all_of(); // declaration needed for the frontend dispatch below + template >, int> = 0> + class _RawPolicy = __remove_cvref_t<_ExecutionPolicy>, + enable_if_t, int> = 0> _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI bool all_of(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred) { - return !std::any_of(__policy, __first, __last, [&](__iter_reference<_ForwardIterator> __value) { - return !__pred(__value); - }); + return std::__pstl_frontend_dispatch( + _LIBCPP_PSTL_CUSTOMIZATION_POINT(__pstl_all_of), + [&](_ForwardIterator __g_first, _ForwardIterator __g_last, _Pred __g_pred) { + return !std::any_of(__policy, __g_first, __g_last, [&](__iter_reference<_ForwardIterator> __value) { + return !__g_pred(__value); + }); + }, + std::move(__first), + std::move(__last), + std::move(__pred)); } +template +void __pstl_none_of(); // declaration needed for the frontend dispatch below + template >, int> = 0> + class _RawPolicy = __remove_cvref_t<_ExecutionPolicy>, + enable_if_t, int> = 0> _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI bool none_of(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred) { - return !std::any_of(__policy, __first, __last, __pred); + return std::__pstl_frontend_dispatch( + _LIBCPP_PSTL_CUSTOMIZATION_POINT(__pstl_none_of), + [&](_ForwardIterator __g_first, _ForwardIterator __g_last, _Pred __g_pred) { + return !std::any_of(__policy, __g_first, __g_last, __g_pred); + }, + std::move(__first), + std::move(__last), + std::move(__pred)); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/pstl_backend.h b/libcxx/include/__algorithm/pstl_backend.h index 587720e..55f2dc4 100644 --- a/libcxx/include/__algorithm/pstl_backend.h +++ b/libcxx/include/__algorithm/pstl_backend.h @@ -29,6 +29,9 @@ A PSTL parallel backend is a tag type to which the following functions are assoc template void __pstl_for_each(_Backend, _ExecutionPolicy&&, _Iterator __first, _Iterator __last, _Func __f); + template + _Iterator __pstl_find_if(_Backend, _Iterator __first, _Iterator __last, _Predicate __pred); + // TODO: Complete this list The following functions are optional but can be provided. If provided, they are used by the corresponding @@ -38,6 +41,21 @@ implemented, all the algorithms will eventually forward to the basis algorithms template void __pstl_for_each_n(_Backend, _ExecutionPolicy&&, _Iterator __first, _Size __n, _Func __f); + template + bool __pstl_any_of(_Backend, _Iterator __first, _iterator __last, _Predicate __pred); + + template + bool __pstl_all_of(_Backend, _Iterator __first, _iterator __last, _Predicate __pred); + + template + bool __pstl_none_of(_Backend, _Iterator __first, _iterator __last, _Predicate __pred); + + template + void __pstl_fill(_Iterator __first, _Iterator __last, const _Tp& __value); + + template + void __pstl_fill_n(_Iterator __first, _SizeT __n, const _Tp& __value); + // TODO: Complete this list */ diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backend.h b/libcxx/include/__algorithm/pstl_backends/cpu_backend.h index b12d454..98e6eb7 100644 --- a/libcxx/include/__algorithm/pstl_backends/cpu_backend.h +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backend.h @@ -17,9 +17,15 @@ template void __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func); + // Cancel the execution of other jobs - they aren't needed anymore + void __cancel_execution(); + TODO: Document the parallel backend */ +#include <__algorithm/pstl_backends/cpu_backends/any_of.h> +#include <__algorithm/pstl_backends/cpu_backends/fill.h> +#include <__algorithm/pstl_backends/cpu_backends/find_if.h> #include <__algorithm/pstl_backends/cpu_backends/for_each.h> #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKEND_H diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h new file mode 100644 index 0000000..661ea2d --- /dev/null +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h @@ -0,0 +1,90 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKEND_ANY_OF_H +#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKEND_ANY_OF_H + +#include <__algorithm/any_of.h> +#include <__algorithm/find_if.h> +#include <__algorithm/pstl_backends/cpu_backends/backend.h> +#include <__atomic/atomic.h> +#include <__atomic/memory_order.h> +#include <__config> +#include <__functional/operations.h> +#include <__iterator/iterator_traits.h> +#include <__type_traits/is_execution_policy.h> +#include <__utility/pair.h> +#include <__utility/terminate_on_exception.h> +#include + +#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +_LIBCPP_HIDE_FROM_ABI bool __parallel_or(_Index __first, _Index __last, _Brick __f) { + std::atomic __found(false); + __par_backend::__parallel_for(__first, __last, [__f, &__found](_Index __i, _Index __j) { + if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) { + __found.store(true, std::memory_order_relaxed); + __par_backend::__cancel_execution(); + } + }); + return __found; +} + +// TODO: check whether __simd_first() can be used here +template +_LIBCPP_HIDE_FROM_ABI bool __simd_or(_Index __first, _DifferenceType __n, _Pred __pred) noexcept { + _DifferenceType __block_size = 4 < __n ? 4 : __n; + const _Index __last = __first + __n; + while (__last != __first) { + int32_t __flag = 1; + _PSTL_PRAGMA_SIMD_REDUCTION(& : __flag) + for (_DifferenceType __i = 0; __i < __block_size; ++__i) + if (__pred(*(__first + __i))) + __flag = 0; + if (!__flag) + return true; + + __first += __block_size; + if (__last - __first >= __block_size << 1) { + // Double the block _Size. Any unnecessary iterations can be amortized against work done so far. + __block_size <<= 1; + } else { + __block_size = __last - __first; + } + } + return false; +} + +template +_LIBCPP_HIDE_FROM_ABI bool +__pstl_any_of(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { + if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && + __is_cpp17_random_access_iterator<_ForwardIterator>::value) { + return std::__terminate_on_exception([&] { + return std::__parallel_or( + __first, __last, [&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { + return std::__pstl_any_of<__remove_parallel_policy_t<_ExecutionPolicy>>( + __cpu_backend_tag{}, __brick_first, __brick_last, __pred); + }); + }); + } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && + __is_cpp17_random_access_iterator<_ForwardIterator>::value) { + return std::__simd_or(__first, __last - __first, __pred); + } else { + return std::any_of(__first, __last, __pred); + } +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 + +#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKEND_ANY_OF_H diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h new file mode 100644 index 0000000..7163c33 --- /dev/null +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h @@ -0,0 +1,60 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FILL_H +#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FILL_H + +#include <__algorithm/fill.h> +#include <__algorithm/pstl_backends/cpu_backends/backend.h> +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__type_traits/is_execution_policy.h> +#include <__utility/terminate_on_exception.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +_LIBCPP_HIDE_FROM_ABI _Index __simd_fill_n(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept { + _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 0; __i < __n; ++__i) + __first[__i] = __value; + return __first + __n; +} + +template +_LIBCPP_HIDE_FROM_ABI void +__pstl_fill(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { + if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && + __is_cpp17_random_access_iterator<_ForwardIterator>::value) { + std::__terminate_on_exception([&] { + __par_backend::__parallel_for( + __first, __last, [&__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { + std::__pstl_fill<__remove_parallel_policy_t<_ExecutionPolicy>>( + __cpu_backend_tag{}, __brick_first, __brick_last, __value); + }); + }); + } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && + __is_cpp17_random_access_iterator<_ForwardIterator>::value) { + std::__simd_fill_n(__first, __last - __first, __value); + } else { + std::fill(__first, __last, __value); + } +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 + +#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FILL_H diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h new file mode 100644 index 0000000..bfab9b5 --- /dev/null +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h @@ -0,0 +1,125 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H +#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H + +#include <__algorithm/find_if.h> +#include <__algorithm/pstl_backends/cpu_backends/backend.h> +#include <__atomic/atomic.h> +#include <__config> +#include <__functional/operations.h> +#include <__iterator/iterator_traits.h> +#include <__type_traits/is_execution_policy.h> +#include <__utility/pair.h> +#include <__utility/terminate_on_exception.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +_LIBCPP_HIDE_FROM_ABI _Index +__parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) { + typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType; + const _DifferenceType __n = __last - __first; + _DifferenceType __initial_dist = __b_first ? __n : -1; + std::atomic<_DifferenceType> __extremum(__initial_dist); + // TODO: find out what is better here: parallel_for or parallel_reduce + __par_backend::__parallel_for(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { + // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of + // why using a shared variable scales fairly well in this situation. + if (__comp(__i - __first, __extremum)) { + _Index __res = __f(__i, __j); + // If not '__last' returned then we found what we want so put this to extremum + if (__res != __j) { + const _DifferenceType __k = __res - __first; + for (_DifferenceType __old = __extremum; __comp(__k, __old); __old = __extremum) { + __extremum.compare_exchange_weak(__old, __k); + } + } + } + }); + return __extremum != __initial_dist ? __first + __extremum : __last; +} + +const std::size_t __lane_size = 64; + +template +_LIBCPP_HIDE_FROM_ABI _Index +__simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept { + // Experiments show good block sizes like this + const _DifferenceType __block_size = 8; + alignas(__lane_size) _DifferenceType __lane[__block_size] = {0}; + while (__end - __begin >= __block_size) { + _DifferenceType __found = 0; + _PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) { + const _DifferenceType __t = __comp(__first, __i); + __lane[__i - __begin] = __t; + __found |= __t; + } + if (__found) { + _DifferenceType __i; + // This will vectorize + for (__i = 0; __i < __block_size; ++__i) { + if (__lane[__i]) { + break; + } + } + return __first + __begin + __i; + } + __begin += __block_size; + } + + // Keep remainder scalar + while (__begin != __end) { + if (__comp(__first, __begin)) { + return __first + __begin; + } + ++__begin; + } + return __first + __end; +} + +template +_LIBCPP_HIDE_FROM_ABI _ForwardIterator +__pstl_find_if(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { + if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && + __is_cpp17_random_access_iterator<_ForwardIterator>::value) { + return std::__terminate_on_exception([&] { + return std::__parallel_find( + __first, + __last, + [&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { + return std::__pstl_find_if<__remove_parallel_policy_t<_ExecutionPolicy>>( + __cpu_backend_tag{}, __brick_first, __brick_last, __pred); + }, + less<>{}, + true); + }); + } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && + __is_cpp17_random_access_iterator<_ForwardIterator>::value) { + using __diff_t = __iter_diff_t<_ForwardIterator>; + return std::__simd_first(__first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) { + return __pred(__iter[__i]); + }); + } else { + return std::find_if(__first, __last, __pred); + } +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 + +#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/serial.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/serial.h index 24bac4f..ccd24cb 100644 --- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/serial.h +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/serial.h @@ -28,6 +28,8 @@ _LIBCPP_HIDE_FROM_ABI void __parallel_for(_RandomAccessIterator __first, _Random __f(__first, __last); } +_LIBCPP_HIDE_FROM_ABI inline void __cancel_execution() {} + // TODO: Complete this list } // namespace __serial_cpu_backend diff --git a/libcxx/include/__algorithm/pstl_fill.h b/libcxx/include/__algorithm/pstl_fill.h index 5e61285..f7850d0 100644 --- a/libcxx/include/__algorithm/pstl_fill.h +++ b/libcxx/include/__algorithm/pstl_fill.h @@ -9,15 +9,11 @@ #ifndef _LIBCPP___ALGORITHM_PSTL_FILL_H #define _LIBCPP___ALGORITHM_PSTL_FILL_H -#include <__algorithm/fill.h> +#include <__algorithm/fill_n.h> +#include <__algorithm/pstl_for_each.h> +#include <__algorithm/pstl_frontend_dispatch.h> #include <__config> #include <__iterator/iterator_traits.h> -#include <__pstl/internal/execution_impl.h> -#include <__pstl/internal/parallel_backend.h> -#include <__pstl/internal/parallel_backend_serial.h> -#include <__pstl/internal/parallel_impl.h> -#include <__pstl/internal/unseq_backend_simd.h> -#include <__type_traits/enable_if.h> #include <__type_traits/is_execution_policy.h> #include <__type_traits/remove_cvref.h> #include <__utility/terminate_on_exception.h> @@ -30,43 +26,50 @@ _LIBCPP_BEGIN_NAMESPACE_STD +template +void __pstl_fill(); // declaration needed for the frontend dispatch below + template >, int> = 0> + class _RawPolicy = __remove_cvref_t<_ExecutionPolicy>, + enable_if_t, int> = 0> _LIBCPP_HIDE_FROM_ABI void fill(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { - if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __is_cpp17_random_access_iterator<_ForwardIterator>::value) { - std::__terminate_on_exception([&] { - __pstl::__par_backend::__parallel_for( - __pstl::__internal::__par_backend_tag{}, - __policy, - __first, - __last, - [&__policy, &__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { - std::fill(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __value); - }); - }); - } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && - __is_cpp17_random_access_iterator<_ForwardIterator>::value) { - __pstl::__unseq_backend::__simd_fill_n(__first, __last - __first, __value); - } else { - std::fill(__first, __last, __value); - } + std::__pstl_frontend_dispatch( + _LIBCPP_PSTL_CUSTOMIZATION_POINT(__pstl_fill), + [&](_ForwardIterator __g_first, _ForwardIterator __g_last, const _Tp& __g_value) { + std::for_each(__policy, __g_first, __g_last, [&](__iter_reference<_ForwardIterator> __element) { + __element = __g_value; + }); + }, + std::move(__first), + std::move(__last), + __value); } +template +void __pstl_fill_n(); // declaration needed for the frontend dispatch below + template >, int> = 0> + class _RawPolicy = __remove_cvref_t<_ExecutionPolicy>, + enable_if_t, int> = 0> _LIBCPP_HIDE_FROM_ABI void fill_n(_ExecutionPolicy&& __policy, _ForwardIterator __first, _SizeT __n, const _Tp& __value) { - if constexpr (__is_cpp17_random_access_iterator<_ForwardIterator>::value) - std::fill(__policy, __first, __first + __n, __value); - else - std::fill_n(__first, __n, __value); + std::__pstl_frontend_dispatch( + _LIBCPP_PSTL_CUSTOMIZATION_POINT(__pstl_fill_n), + [&](_ForwardIterator __g_first, _SizeT __g_n, const _Tp& __g_value) { + if constexpr (__is_cpp17_random_access_iterator<_ForwardIterator>::value) + std::fill(__policy, __g_first, __g_first + __g_n, __g_value); + else + std::fill_n(__g_first, __g_n, __g_value); + }, + std::move(__first), + __n, + __value); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/pstl_find.h b/libcxx/include/__algorithm/pstl_find.h index bd26826..6d69560 100644 --- a/libcxx/include/__algorithm/pstl_find.h +++ b/libcxx/include/__algorithm/pstl_find.h @@ -11,13 +11,10 @@ #include <__algorithm/comp.h> #include <__algorithm/find.h> +#include <__algorithm/pstl_backend.h> +#include <__algorithm/pstl_frontend_dispatch.h> #include <__config> -#include <__functional/operations.h> #include <__iterator/iterator_traits.h> -#include <__pstl/internal/execution_impl.h> -#include <__pstl/internal/parallel_impl.h> -#include <__pstl/internal/unseq_backend_simd.h> -#include <__type_traits/enable_if.h> #include <__type_traits/is_execution_policy.h> #include <__type_traits/remove_cvref.h> #include <__utility/terminate_on_exception.h> @@ -32,77 +29,57 @@ _LIBCPP_BEGIN_NAMESPACE_STD template >, int> = 0> + class _Predicate, + class _RawPolicy = __remove_cvref_t<_ExecutionPolicy>, + enable_if_t, int> = 0> _LIBCPP_HIDE_FROM_ABI _ForwardIterator -find(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { - if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __is_cpp17_random_access_iterator<_ForwardIterator>::value) { - return std::__terminate_on_exception([&] { - return __pstl::__internal::__parallel_find( - __pstl::__internal::__par_backend_tag{}, - __policy, - __first, - __last, - [&__policy, &__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { - return std::find(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __value); - }, - less<>{}, - true); - }); - } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && - __is_cpp17_random_access_iterator<_ForwardIterator>::value) { - using __diff_t = __iter_diff_t<_ForwardIterator>; - return __pstl::__unseq_backend::__simd_first( - __first, __diff_t(0), __last - __first, [&__value](_ForwardIterator __iter, __diff_t __i) { - return __iter[__i] == __value; - }); - } else { - return std::find(__first, __last, __value); - } +find_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { + using _Backend = typename __select_backend<_RawPolicy>::type; + return std::__pstl_find_if<_RawPolicy>(_Backend{}, std::move(__first), std::move(__last), std::move(__pred)); } +template +void __pstl_find_if_not(); + template >, int> = 0> + class _RawPolicy = __remove_cvref_t<_ExecutionPolicy>, + enable_if_t, int> = 0> _LIBCPP_HIDE_FROM_ABI _ForwardIterator -find_if(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> && - __is_cpp17_random_access_iterator<_ForwardIterator>::value) { - return std::__terminate_on_exception([&] { - return __pstl::__internal::__parallel_find( - __pstl::__internal::__par_backend_tag{}, - __policy, - __first, - __last, - [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) { - return std::find_if(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred); - }, - less<>{}, - true); - }); - } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> && - __is_cpp17_random_access_iterator<_ForwardIterator>::value) { - using __diff_t = __iter_diff_t<_ForwardIterator>; - return __pstl::__unseq_backend::__simd_first( - __first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) { - return __pred(__iter[__i]); +find_if_not(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { + return std::__pstl_frontend_dispatch( + _LIBCPP_PSTL_CUSTOMIZATION_POINT(__pstl_find_if_not), + [&](_ForwardIterator __g_first, _ForwardIterator __g_last, _Predicate __g_pred) { + return std::find_if(__policy, __g_first, __g_last, [&](__iter_reference<_ForwardIterator> __value) { + return !__g_pred(__value); }); - } else { - return std::find_if(__first, __last, __pred); - } + }, + std::move(__first), + std::move(__last), + std::move(__pred)); } +template +void __pstl_find(); + template >, int> = 0> + class _Tp, + class _RawPolicy = __remove_cvref_t<_ExecutionPolicy>, + enable_if_t, int> = 0> _LIBCPP_HIDE_FROM_ABI _ForwardIterator -find_if_not(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - return std::find_if(__policy, __first, __last, [&](__iter_reference<_ForwardIterator> __value) { - return !__pred(__value); - }); +find(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { + return std::__pstl_frontend_dispatch( + _LIBCPP_PSTL_CUSTOMIZATION_POINT(__pstl_find), + [&](_ForwardIterator __g_first, _ForwardIterator __g_last, const _Tp& __g_value) { + return std::find_if(__policy, __g_first, __g_last, [&](__iter_reference<_ForwardIterator> __element) { + return __element == __g_value; + }); + }, + std::move(__first), + std::move(__last), + __value); } _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/pstl_for_each.h b/libcxx/include/__algorithm/pstl_for_each.h index 4e183c6..78cb01c 100644 --- a/libcxx/include/__algorithm/pstl_for_each.h +++ b/libcxx/include/__algorithm/pstl_for_each.h @@ -15,10 +15,6 @@ #include <__algorithm/pstl_frontend_dispatch.h> #include <__config> #include <__iterator/iterator_traits.h> -#include <__pstl/internal/parallel_backend.h> -#include <__pstl/internal/parallel_backend_serial.h> -#include <__pstl/internal/unseq_backend_simd.h> -#include <__type_traits/enable_if.h> #include <__type_traits/is_execution_policy.h> #include <__type_traits/remove_cvref.h> #include <__type_traits/void_t.h> diff --git a/libcxx/include/__pstl/internal/parallel_backend_serial.h b/libcxx/include/__pstl/internal/parallel_backend_serial.h index 4733899..b3ecb82 100644 --- a/libcxx/include/__pstl/internal/parallel_backend_serial.h +++ b/libcxx/include/__pstl/internal/parallel_backend_serial.h @@ -46,18 +46,6 @@ class __buffer _LIBCPP_HIDE_FROM_ABI ~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); } }; -_LIBCPP_HIDE_FROM_ABI inline void -__cancel_execution() -{ -} - -template -_LIBCPP_HIDE_FROM_ABI void -__parallel_for(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) -{ - __f(__first, __last); -} - template _LIBCPP_HIDE_FROM_ABI _Value __parallel_reduce(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, diff --git a/libcxx/include/__pstl/internal/parallel_impl.h b/libcxx/include/__pstl/internal/parallel_impl.h deleted file mode 100644 index 740f137..0000000 --- a/libcxx/include/__pstl/internal/parallel_impl.h +++ /dev/null @@ -1,91 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _PSTL_PARALLEL_IMPL_H -#define _PSTL_PARALLEL_IMPL_H - -#include <__atomic/atomic.h> -#include <__atomic/memory_order.h> -#include <__config> -#include <__iterator/iterator_traits.h> -#include <__pstl/internal/parallel_backend.h> -#include <__pstl/internal/parallel_backend_serial.h> -#include <__utility/forward.h> - -#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 - -namespace __pstl -{ -namespace __internal -{ - -//------------------------------------------------------------------------ -// parallel_find -//----------------------------------------------------------------------- -/** Return extremum value returned by brick f[i,j) for subranges [i,j) of [first,last) -Each f[i,j) must return a value in [i,j). */ -template -_LIBCPP_HIDE_FROM_ABI _Index -__parallel_find(_BackendTag __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, - _Compare __comp, bool __b_first) -{ - typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType; - const _DifferenceType __n = __last - __first; - _DifferenceType __initial_dist = __b_first ? __n : -1; - std::atomic<_DifferenceType> __extremum(__initial_dist); - // TODO: find out what is better here: parallel_for or parallel_reduce - __par_backend::__parallel_for(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__comp, __f, __first, &__extremum](_Index __i, _Index __j) - { - // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of - // why using a shared variable scales fairly well in this situation. - if (__comp(__i - __first, __extremum)) - { - _Index __res = __f(__i, __j); - // If not '__last' returned then we found what we want so put this to extremum - if (__res != __j) - { - const _DifferenceType __k = __res - __first; - for (_DifferenceType __old = __extremum; __comp(__k, __old); - __old = __extremum) - { - __extremum.compare_exchange_weak(__old, __k); - } - } - } - }); - return __extremum != __initial_dist ? __first + __extremum : __last; -} - -//------------------------------------------------------------------------ -// parallel_or -//------------------------------------------------------------------------ -//! Return true if brick f[i,j) returns true for some subrange [i,j) of [first,last) -template -_LIBCPP_HIDE_FROM_ABI -bool __parallel_or(_BackendTag __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f) { - std::atomic __found(false); - __par_backend::__parallel_for(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__f, &__found](_Index __i, _Index __j) - { - if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) - { - __found.store(true, std::memory_order_relaxed); - __par_backend::__cancel_execution(); - } - }); - return __found; -} - -} // namespace __internal -} // namespace __pstl - -#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 - -#endif /* _PSTL_PARALLEL_IMPL_H */ diff --git a/libcxx/include/__pstl/internal/unseq_backend_simd.h b/libcxx/include/__pstl/internal/unseq_backend_simd.h index b4a7f64..268ea1d 100644 --- a/libcxx/include/__pstl/internal/unseq_backend_simd.h +++ b/libcxx/include/__pstl/internal/unseq_backend_simd.h @@ -98,52 +98,6 @@ __simd_or(_Index __first, _DifferenceType __n, _Pred __pred) noexcept return false; } -template -_LIBCPP_HIDE_FROM_ABI _Index -__simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept -{ - // Experiments show good block sizes like this - const _DifferenceType __block_size = 8; - alignas(__lane_size) _DifferenceType __lane[__block_size] = {0}; - while (__end - __begin >= __block_size) - { - _DifferenceType __found = 0; - _PSTL_PRAGMA_SIMD_REDUCTION(| - : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; - ++__i) - { - const _DifferenceType __t = __comp(__first, __i); - __lane[__i - __begin] = __t; - __found |= __t; - } - if (__found) - { - _DifferenceType __i; - // This will vectorize - for (__i = 0; __i < __block_size; ++__i) - { - if (__lane[__i]) - { - break; - } - } - return __first + __begin + __i; - } - __begin += __block_size; - } - - //Keep remainder scalar - while (__begin != __end) - { - if (__comp(__first, __begin)) - { - return __first + __begin; - } - ++__begin; - } - return __first + __end; -} - template _LIBCPP_HIDE_FROM_ABI std::pair<_Index1, _Index2> __simd_first(_Index1 __first1, _DifferenceType __n, _Index2 __first2, _Pred __pred) noexcept @@ -323,17 +277,6 @@ __simd_partition_by_mask(_InputIterator __first, _DifferenceType __n, _OutputIte } } -template -_LIBCPP_HIDE_FROM_ABI _Index -__simd_fill_n(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept -{ - _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED - _PSTL_PRAGMA_SIMD - for (_DifferenceType __i = 0; __i < __n; ++__i) - __first[__i] = __value; - return __first + __n; -} - template _LIBCPP_HIDE_FROM_ABI _Index __simd_generate_n(_Index __first, _DifferenceType __size, _Generator __g) noexcept diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 33baaf6e..e4b5dae 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -322,7 +322,10 @@ module std [system] { module prev_permutation { private header "__algorithm/prev_permutation.h" } module pstl { private header "__algorithm/pstl_backends/cpu_backend.h" + private header "__algorithm/pstl_backends/cpu_backends/any_of.h" private header "__algorithm/pstl_backends/cpu_backends/backend.h" + private header "__algorithm/pstl_backends/cpu_backends/fill.h" + private header "__algorithm/pstl_backends/cpu_backends/find_if.h" private header "__algorithm/pstl_backends/cpu_backends/for_each.h" private header "__algorithm/pstl_backends/cpu_backends/serial.h" } diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp index 9dd2607..60d7081 100644 --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -115,7 +115,10 @@ END-SCRIPT #include <__algorithm/pop_heap.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pop_heap.h'}} #include <__algorithm/prev_permutation.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/prev_permutation.h'}} #include <__algorithm/pstl_backends/cpu_backend.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backend.h'}} +#include <__algorithm/pstl_backends/cpu_backends/any_of.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/any_of.h'}} #include <__algorithm/pstl_backends/cpu_backends/backend.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/backend.h'}} +#include <__algorithm/pstl_backends/cpu_backends/fill.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/fill.h'}} +#include <__algorithm/pstl_backends/cpu_backends/find_if.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/find_if.h'}} #include <__algorithm/pstl_backends/cpu_backends/for_each.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/for_each.h'}} #include <__algorithm/pstl_backends/cpu_backends/serial.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/serial.h'}} #include <__algorithm/push_heap.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/push_heap.h'}} diff --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt index c1e43be..c32f5ec 100644 --- a/libcxx/utils/data/ignore_format.txt +++ b/libcxx/utils/data/ignore_format.txt @@ -521,7 +521,6 @@ libcxx/include/__pstl/internal/parallel_backend.h libcxx/include/__pstl/internal/parallel_backend_serial.h libcxx/include/__pstl/internal/parallel_backend_tbb.h libcxx/include/__pstl/internal/parallel_backend_utils.h -libcxx/include/__pstl/internal/parallel_impl.h libcxx/include/__pstl/internal/unseq_backend_simd.h libcxx/include/__pstl/internal/utils.h libcxx/include/queue -- 2.7.4