Don't forward to `min_element` for small types that are trivially copyable, and instead use a naive loop that keeps track of the smallest element (as opposed to an iterator to the smallest element). This allows the compiler to vectorize the loop in some cases.
Reviewed By: #libc, ldionne
Spies: ldionne, libcxx-commits
Differential Revision: https://reviews.llvm.org/D143596
algorithms/lower_bound.bench.cpp
algorithms/make_heap.bench.cpp
algorithms/make_heap_then_sort_heap.bench.cpp
+ algorithms/min.bench.cpp
algorithms/min_max_element.bench.cpp
algorithms/pop_heap.bench.cpp
algorithms/push_heap.bench.cpp
--- /dev/null
+#include <algorithm>
+#include <cassert>
+
+#include <benchmark/benchmark.h>
+
+void run_sizes(auto benchmark) {
+ benchmark->Arg(1)
+ ->Arg(2)
+ ->Arg(3)
+ ->Arg(4)
+ ->Arg(5)
+ ->Arg(6)
+ ->Arg(7)
+ ->Arg(8)
+ ->Arg(9)
+ ->Arg(10)
+ ->Arg(11)
+ ->Arg(12)
+ ->Arg(13)
+ ->Arg(14)
+ ->Arg(15)
+ ->Arg(16)
+ ->Arg(17)
+ ->Arg(18)
+ ->Arg(19)
+ ->Arg(20)
+ ->Arg(21)
+ ->Arg(22)
+ ->Arg(23)
+ ->Arg(24)
+ ->Arg(25)
+ ->Arg(26)
+ ->Arg(27)
+ ->Arg(28)
+ ->Arg(29)
+ ->Arg(30)
+ ->Arg(31)
+ ->Arg(32)
+ ->Arg(64)
+ ->Arg(512)
+ ->Arg(1024)
+ ->Arg(4000)
+ ->Arg(4096)
+ ->Arg(5500)
+ ->Arg(64000)
+ ->Arg(65536)
+ ->Arg(70000);
+}
+
+template <class T>
+static void BM_std_min(benchmark::State& state) {
+ std::vector<T> vec(state.range(), 3);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(vec);
+ benchmark::DoNotOptimize(std::ranges::min(vec));
+ }
+}
+BENCHMARK(BM_std_min<char>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<short>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<int>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<long long>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<__int128>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned char>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned short>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned int>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned long long>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned __int128>)->Apply(run_sizes);
+
+BENCHMARK_MAIN();
#include <__iterator/projected.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
+#include <__type_traits/is_trivially_copyable.h>
#include <__utility/move.h>
#include <initializer_list>
_LIBCPP_ASSERT(__first != __last, "range must contain at least one element");
- if constexpr (forward_range<_Rp>) {
+ if constexpr (forward_range<_Rp> && !__is_cheap_to_copy<range_value_t<_Rp>>) {
auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) { return std::invoke(__comp, __rhs, __lhs); };
return *ranges::__min_element_impl(std::move(__first), std::move(__last), __comp_lhs_rhs_swapped, __proj);
} else {
#include <__iterator/projected.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
+#include <__type_traits/is_trivially_copyable.h>
#include <initializer_list>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
range_value_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const {
auto __first = ranges::begin(__r);
auto __last = ranges::end(__r);
-
_LIBCPP_ASSERT(__first != __last, "range must contain at least one element");
-
- if constexpr (forward_range<_Rp>) {
+ if constexpr (forward_range<_Rp> && !__is_cheap_to_copy<range_value_t<_Rp>>) {
return *ranges::__min_element_impl(__first, __last, __comp, __proj);
} else {
range_value_t<_Rp> __result = *__first;
#include <__config>
#include <__type_traits/integral_constant.h>
+#include <cstdint>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
inline constexpr bool is_trivially_copyable_v = __is_trivially_copyable(_Tp);
#endif
+#if _LIBCPP_STD_VER >= 20
+template <class _Tp>
+inline constexpr bool __is_cheap_to_copy = is_trivially_copyable_v<_Tp> && sizeof(_Tp) <= sizeof(std::intmax_t);
+#endif
+
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_H
utility version
valarray cmath
valarray cstddef
+valarray cstdint
valarray cstdlib
valarray initializer_list
valarray limits
template <class It, class Sent = It>
constexpr void test_range_types() {
- int a[] = {7, 6, 9, 3, 5, 1, 2, 4};
+ std::iter_value_t<It> a[] = {7, 6, 9, 3, 5, 1, 2, 4};
auto range = std::ranges::subrange(It(a), Sent(It(a + 8)));
- int ret = std::ranges::max(range);
+ auto ret = std::ranges::max(range);
assert(ret == 9);
}
constexpr void test_range() {
- { // check that all range types work
- test_range_types<cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>();
- test_range_types<forward_iterator<int*>>();
- test_range_types<bidirectional_iterator<int*>>();
- test_range_types<random_access_iterator<int*>>();
- test_range_types<contiguous_iterator<int*>>();
+ // check that all range types work
+ {
+ struct NonTrivialInt {
+ int val_;
+ constexpr NonTrivialInt(int val) : val_(val) {}
+ constexpr NonTrivialInt(const NonTrivialInt& other) : val_(other.val_) {}
+ constexpr NonTrivialInt& operator=(const NonTrivialInt& other) {
+ val_ = other.val_;
+ return *this;
+ }
+
+ constexpr ~NonTrivialInt() {}
+
+ auto operator<=>(const NonTrivialInt&) const = default;
+ };
+
+ auto call_with_sentinels = []<class Iter> {
+ if constexpr (std::forward_iterator<Iter>)
+ test_range_types<Iter, Iter>();
+ test_range_types<Iter, sentinel_wrapper<Iter>>();
+ test_range_types<Iter, sized_sentinel<Iter>>();
+ };
+
+ types::for_each(types::cpp20_input_iterator_list<int*>{}, call_with_sentinels);
+ types::for_each(types::cpp20_input_iterator_list<NonTrivialInt*>{}, call_with_sentinels);
}
int a[] = {7, 6, 9, 3, 5, 1, 2, 4};
template <class It, class Sent = It>
constexpr void test_range_types() {
- int a[] = {7, 6, 9, 3, 5, 1, 2, 4};
+ std::iter_value_t<It> a[] = {7, 6, 9, 3, 5, 1, 2, 4};
auto range = std::ranges::subrange(It(a), Sent(It(a + 8)));
- int ret = std::ranges::min(range);
+ auto ret = std::ranges::min(range);
assert(ret == 1);
}
constexpr void test_range() {
- { // check that all range types work
- test_range_types<cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>();
- test_range_types<forward_iterator<int*>>();
- test_range_types<bidirectional_iterator<int*>>();
- test_range_types<random_access_iterator<int*>>();
- test_range_types<contiguous_iterator<int*>>();
+ // check that all range types work
+ {
+ struct NonTrivialInt {
+ int val_;
+ constexpr NonTrivialInt(int val) : val_(val) {}
+ constexpr NonTrivialInt(const NonTrivialInt& other) : val_(other.val_) {}
+ constexpr NonTrivialInt& operator=(const NonTrivialInt& other) {
+ val_ = other.val_;
+ return *this;
+ }
+
+ constexpr ~NonTrivialInt() {}
+
+ auto operator<=>(const NonTrivialInt&) const = default;
+ };
+
+ auto call_with_sentinels = []<class Iter> {
+ if constexpr (std::forward_iterator<Iter>)
+ test_range_types<Iter, Iter>();
+ test_range_types<Iter, sentinel_wrapper<Iter>>();
+ test_range_types<Iter, sized_sentinel<Iter>>();
+ };
+
+ types::for_each(types::cpp20_input_iterator_list<int*>{}, call_with_sentinels);
+ types::for_each(types::cpp20_input_iterator_list<NonTrivialInt*>{}, call_with_sentinels);
}
int a[] = {7, 6, 9, 3, 5, 1, 2, 4};