From 503f2ee4a86a740c4b4f442d95ff4e6d7f66a670 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 18 Jul 2023 18:02:09 -0400 Subject: [PATCH] [libc++] Make sure we use the libdispatch backend on Apple platforms The Apple.cmake cache wasn't set up properly, so we wouldn't enable the libdispatch backend by default on Apple platforms. This patch fixes the issue and adds a test. We also need to make various drive-by fixes: - Drop the usage of std::vector in libdispatch.h to avoid changing the transitive includes only on Apple platforms. - Fix includes - Use __construct at since construct_at is unavailable in C++17 - Get rid of the (unused) __get_memory_resource function since that adds a back-deployment requirement and we don't use it right now. - Fix bugs in the chunking logic around boundary conditions. Differential Revision: https://reviews.llvm.org/D155649 --- libcxx/cmake/caches/Apple.cmake | 2 +- .../pstl_backends/cpu_backends/libdispatch.h | 33 ++++++++++++++++------ libcxx/src/pstl/libdispatch.cpp | 11 ++++---- .../pstl.libdispatch.chunk_partitions.pass.cpp | 17 ++++++++++- .../vendor/apple/system-install-properties.sh.cpp | 4 +++ 5 files changed, 51 insertions(+), 16 deletions(-) diff --git a/libcxx/cmake/caches/Apple.cmake b/libcxx/cmake/caches/Apple.cmake index 32aae6d..804eccd 100644 --- a/libcxx/cmake/caches/Apple.cmake +++ b/libcxx/cmake/caches/Apple.cmake @@ -7,7 +7,7 @@ set(LIBCXX_ENABLE_STATIC ON CACHE BOOL "") set(LIBCXX_ENABLE_SHARED ON CACHE BOOL "") set(LIBCXX_CXX_ABI libcxxabi CACHE STRING "") set(LIBCXX_ENABLE_VENDOR_AVAILABILITY_ANNOTATIONS ON CACHE BOOL "") -set(LIBCXX_PSTL_CPU_BACKEND libdispatch) +set(LIBCXX_PSTL_CPU_BACKEND libdispatch CACHE STRING "") set(LIBCXX_HERMETIC_STATIC_LIBRARY ON CACHE BOOL "") set(LIBCXXABI_HERMETIC_STATIC_LIBRARY ON CACHE BOOL "") diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h index 49af1c4..bab6a36 100644 --- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h @@ -10,22 +10,26 @@ #define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H #include <__algorithm/lower_bound.h> +#include <__algorithm/max.h> #include <__algorithm/upper_bound.h> #include <__atomic/atomic.h> #include <__config> #include <__exception/terminate.h> #include <__iterator/iterator_traits.h> #include <__iterator/move_iterator.h> +#include <__memory/allocator.h> #include <__memory/construct_at.h> #include <__memory/unique_ptr.h> -#include <__memory_resource/memory_resource.h> #include <__numeric/reduce.h> #include <__utility/exception_guard.h> #include <__utility/move.h> +#include <__utility/pair.h> #include <__utility/terminate_on_exception.h> #include #include -#include + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 @@ -53,7 +57,6 @@ struct __chunk_partitions { ptrdiff_t __first_chunk_size_; }; -[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI pmr::memory_resource* __get_memory_resource(); [[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size); template @@ -107,13 +110,20 @@ _LIBCPP_HIDE_FROM_ABI void __parallel_merge( } using __merge_range_t = __merge_range<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>; + auto const __n_ranges = __partitions.__chunk_count_ + 1; - vector<__merge_range_t> __ranges; - __ranges.reserve(__partitions.__chunk_count_ + 1); + // TODO: use __uninitialized_buffer + auto __destroy = [=](__merge_range_t* __ptr) { + std::destroy_n(__ptr, __n_ranges); + std::allocator<__merge_range_t>().deallocate(__ptr, __n_ranges); + }; + unique_ptr<__merge_range_t[], decltype(__destroy)> __ranges( + std::allocator<__merge_range_t>().allocate(__n_ranges), __destroy); // TODO: Improve the case where the smaller range is merged into just a few (or even one) chunks of the larger case std::__terminate_on_exception([&] { - __ranges.emplace_back(__first1, __first2, __result); + __merge_range_t* __r = __ranges.get(); + std::__construct_at(__r++, __first1, __first2, __result); bool __iterate_first_range = __last1 - __first1 > __last2 - __first2; @@ -137,14 +147,14 @@ _LIBCPP_HIDE_FROM_ABI void __parallel_merge( }; // handle first chunk - __ranges.emplace_back(__compute_chunk(__partitions.__first_chunk_size_)); + std::__construct_at(__r++, __compute_chunk(__partitions.__first_chunk_size_)); // handle 2 -> N - 1 chunks for (ptrdiff_t __i = 0; __i != __partitions.__chunk_count_ - 2; ++__i) - __ranges.emplace_back(__compute_chunk(__partitions.__chunk_size_)); + std::__construct_at(__r++, __compute_chunk(__partitions.__chunk_size_)); // handle last chunk - __ranges.emplace_back(__last1, __last2, __result); + std::__construct_at(__r, __last1, __last2, __result); __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __index) { auto __first_iters = __ranges[__index]; @@ -168,6 +178,9 @@ _LIBCPP_HIDE_FROM_ABI _Value __parallel_transform_reduce( _Value __init, _Combiner __combiner, _Reduction __reduction) { + if (__first == __last) + return __init; + auto __partitions = __libdispatch::__partition_chunks(__last - __first); auto __destroy = [__count = __partitions.__chunk_count_](_Value* __ptr) { @@ -223,4 +236,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H diff --git a/libcxx/src/pstl/libdispatch.cpp b/libcxx/src/pstl/libdispatch.cpp index e264aad..b3a9559 100644 --- a/libcxx/src/pstl/libdispatch.cpp +++ b/libcxx/src/pstl/libdispatch.cpp @@ -10,23 +10,24 @@ #include <__algorithm/pstl_backends/cpu_backends/libdispatch.h> #include <__config> #include -#include #include _LIBCPP_BEGIN_NAMESPACE_STD namespace __par_backend::inline __libdispatch { -pmr::memory_resource* __get_memory_resource() { - static std::pmr::synchronized_pool_resource pool{pmr::new_delete_resource()}; - return &pool; -} void __dispatch_apply(size_t chunk_count, void* context, void (*func)(void* context, size_t chunk)) noexcept { ::dispatch_apply_f(chunk_count, DISPATCH_APPLY_AUTO, context, func); } __chunk_partitions __partition_chunks(ptrdiff_t element_count) { + if (element_count == 0) { + return __chunk_partitions{1, 0, 0}; + } else if (element_count == 1) { + return __chunk_partitions{1, 0, 1}; + } + __chunk_partitions partitions; partitions.__chunk_count_ = [&] { ptrdiff_t cores = std::max(1u, thread::hardware_concurrency()); diff --git a/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp b/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp index 1f1c96e..91935a8 100644 --- a/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp +++ b/libcxx/test/libcxx/algorithms/pstl.libdispatch.chunk_partitions.pass.cpp @@ -17,8 +17,23 @@ #include int main(int, char**) { - for (std::ptrdiff_t i = 0; i != 2ll << 20; ++i) { + { + auto chunks = std::__par_backend::__libdispatch::__partition_chunks(0); + assert(chunks.__chunk_count_ == 1); + assert(chunks.__first_chunk_size_ == 0); + assert(chunks.__chunk_size_ == 0); + } + + { + auto chunks = std::__par_backend::__libdispatch::__partition_chunks(1); + assert(chunks.__chunk_count_ == 1); + assert(chunks.__first_chunk_size_ == 1); + assert(chunks.__chunk_size_ == 0); + } + + for (std::ptrdiff_t i = 2; i != 2ll << 20; ++i) { auto chunks = std::__par_backend::__libdispatch::__partition_chunks(i); + assert(chunks.__chunk_count_ >= 1); assert(chunks.__chunk_count_ <= i); assert((chunks.__chunk_count_ - 1) * chunks.__chunk_size_ + chunks.__first_chunk_size_ == i); } diff --git a/libcxx/test/libcxx/vendor/apple/system-install-properties.sh.cpp b/libcxx/test/libcxx/vendor/apple/system-install-properties.sh.cpp index e155013..6c84e0d 100644 --- a/libcxx/test/libcxx/vendor/apple/system-install-properties.sh.cpp +++ b/libcxx/test/libcxx/vendor/apple/system-install-properties.sh.cpp @@ -42,3 +42,7 @@ // when they are loaded by dyld, if the compatibility version was bumped. // // RUN: otool -L "%{lib}/libc++.1.dylib" | grep "libc++.1.dylib" | grep "compatibility version 1.0.0" + +// Make sure we use the libdispatch backend for the PSTL. +// +// RUN: grep "%{include}/__config_site" -e '#define _LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH' -- 2.7.4