[libc++] Optimize std::ranges::{min, max} for types that are cheap to copy
authorNikolas Klauser <nikolasklauser@berlin.de>
Wed, 8 Feb 2023 19:09:12 +0000 (20:09 +0100)
committerNikolas Klauser <nikolasklauser@berlin.de>
Sat, 11 Mar 2023 15:28:24 +0000 (16:28 +0100)
Don't forward to `min_element` for small types that are trivially copyable, and instead use a naive loop that keeps track of the smallest element (as opposed to an iterator to the smallest element). This allows the compiler to vectorize the loop in some cases.

Reviewed By: #libc, ldionne

Spies: ldionne, libcxx-commits

Differential Revision: https://reviews.llvm.org/D143596

libcxx/benchmarks/CMakeLists.txt
libcxx/benchmarks/algorithms/min.bench.cpp [new file with mode: 0644]
libcxx/include/__algorithm/ranges_max.h
libcxx/include/__algorithm/ranges_min.h
libcxx/include/__type_traits/is_trivially_copyable.h
libcxx/test/libcxx/transitive_includes/cxx2b.csv
libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max.pass.cpp
libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.min.pass.cpp

index 572f210..979e9c9 100644 (file)
@@ -162,6 +162,7 @@ set(BENCHMARK_TESTS
     algorithms/lower_bound.bench.cpp
     algorithms/make_heap.bench.cpp
     algorithms/make_heap_then_sort_heap.bench.cpp
+    algorithms/min.bench.cpp
     algorithms/min_max_element.bench.cpp
     algorithms/pop_heap.bench.cpp
     algorithms/push_heap.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/min.bench.cpp b/libcxx/benchmarks/algorithms/min.bench.cpp
new file mode 100644 (file)
index 0000000..1e1dd4e
--- /dev/null
@@ -0,0 +1,70 @@
+#include <algorithm>
+#include <cassert>
+
+#include <benchmark/benchmark.h>
+
+void run_sizes(auto benchmark) {
+  benchmark->Arg(1)
+      ->Arg(2)
+      ->Arg(3)
+      ->Arg(4)
+      ->Arg(5)
+      ->Arg(6)
+      ->Arg(7)
+      ->Arg(8)
+      ->Arg(9)
+      ->Arg(10)
+      ->Arg(11)
+      ->Arg(12)
+      ->Arg(13)
+      ->Arg(14)
+      ->Arg(15)
+      ->Arg(16)
+      ->Arg(17)
+      ->Arg(18)
+      ->Arg(19)
+      ->Arg(20)
+      ->Arg(21)
+      ->Arg(22)
+      ->Arg(23)
+      ->Arg(24)
+      ->Arg(25)
+      ->Arg(26)
+      ->Arg(27)
+      ->Arg(28)
+      ->Arg(29)
+      ->Arg(30)
+      ->Arg(31)
+      ->Arg(32)
+      ->Arg(64)
+      ->Arg(512)
+      ->Arg(1024)
+      ->Arg(4000)
+      ->Arg(4096)
+      ->Arg(5500)
+      ->Arg(64000)
+      ->Arg(65536)
+      ->Arg(70000);
+}
+
+template <class T>
+static void BM_std_min(benchmark::State& state) {
+  std::vector<T> vec(state.range(), 3);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(vec);
+    benchmark::DoNotOptimize(std::ranges::min(vec));
+  }
+}
+BENCHMARK(BM_std_min<char>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<short>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<int>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<long long>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<__int128>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned char>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned short>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned int>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned long long>)->Apply(run_sizes);
+BENCHMARK(BM_std_min<unsigned __int128>)->Apply(run_sizes);
+
+BENCHMARK_MAIN();
index b3c7fb1..2fd2970 100644 (file)
@@ -20,6 +20,7 @@
 #include <__iterator/projected.h>
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
+#include <__type_traits/is_trivially_copyable.h>
 #include <__utility/move.h>
 #include <initializer_list>
 
@@ -67,7 +68,7 @@ struct __fn {
 
     _LIBCPP_ASSERT(__first != __last, "range must contain at least one element");
 
-    if constexpr (forward_range<_Rp>) {
+    if constexpr (forward_range<_Rp> && !__is_cheap_to_copy<range_value_t<_Rp>>) {
       auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) { return std::invoke(__comp, __rhs, __lhs); };
       return *ranges::__min_element_impl(std::move(__first), std::move(__last), __comp_lhs_rhs_swapped, __proj);
     } else {
index ba9dad5..5e941a1 100644 (file)
@@ -20,6 +20,7 @@
 #include <__iterator/projected.h>
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
+#include <__type_traits/is_trivially_copyable.h>
 #include <initializer_list>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -61,10 +62,8 @@ struct __fn {
   range_value_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const {
     auto __first = ranges::begin(__r);
     auto __last = ranges::end(__r);
-
     _LIBCPP_ASSERT(__first != __last, "range must contain at least one element");
-
-    if constexpr (forward_range<_Rp>) {
+    if constexpr (forward_range<_Rp> && !__is_cheap_to_copy<range_value_t<_Rp>>) {
       return *ranges::__min_element_impl(__first, __last, __comp, __proj);
     } else {
       range_value_t<_Rp> __result = *__first;
index 3e6d598..a725a0b 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <__config>
 #include <__type_traits/integral_constant.h>
+#include <cstdint>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -27,6 +28,11 @@ template <class _Tp>
 inline constexpr bool is_trivially_copyable_v = __is_trivially_copyable(_Tp);
 #endif
 
+#if _LIBCPP_STD_VER >= 20
+template <class _Tp>
+inline constexpr bool __is_cheap_to_copy = is_trivially_copyable_v<_Tp> && sizeof(_Tp) <= sizeof(std::intmax_t);
+#endif
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___TYPE_TRAITS_IS_TRIVIALLY_COPYABLE_H
index 06ee226..beb4acc 100644 (file)
@@ -652,6 +652,7 @@ utility limits
 utility version
 valarray cmath
 valarray cstddef
+valarray cstdint
 valarray cstdlib
 valarray initializer_list
 valarray limits
index 3e0d82d..e9e5240 100644 (file)
@@ -176,19 +176,38 @@ constexpr void test_initializer_list() {
 
 template <class It, class Sent = It>
 constexpr void test_range_types() {
-  int a[] = {7, 6, 9, 3, 5, 1, 2, 4};
+  std::iter_value_t<It> a[] = {7, 6, 9, 3, 5, 1, 2, 4};
   auto range = std::ranges::subrange(It(a), Sent(It(a + 8)));
-  int ret = std::ranges::max(range);
+  auto ret = std::ranges::max(range);
   assert(ret == 9);
 }
 
 constexpr void test_range() {
-  { // check that all range types work
-    test_range_types<cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>();
-    test_range_types<forward_iterator<int*>>();
-    test_range_types<bidirectional_iterator<int*>>();
-    test_range_types<random_access_iterator<int*>>();
-    test_range_types<contiguous_iterator<int*>>();
+  // check that all range types work
+  {
+    struct NonTrivialInt {
+      int val_;
+      constexpr NonTrivialInt(int val) : val_(val) {}
+      constexpr NonTrivialInt(const NonTrivialInt& other) : val_(other.val_) {}
+      constexpr NonTrivialInt& operator=(const NonTrivialInt& other) {
+        val_ = other.val_;
+        return *this;
+      }
+
+      constexpr ~NonTrivialInt() {}
+
+      auto operator<=>(const NonTrivialInt&) const = default;
+    };
+
+    auto call_with_sentinels = []<class Iter> {
+      if constexpr (std::forward_iterator<Iter>)
+        test_range_types<Iter, Iter>();
+      test_range_types<Iter, sentinel_wrapper<Iter>>();
+      test_range_types<Iter, sized_sentinel<Iter>>();
+    };
+
+    types::for_each(types::cpp20_input_iterator_list<int*>{}, call_with_sentinels);
+    types::for_each(types::cpp20_input_iterator_list<NonTrivialInt*>{}, call_with_sentinels);
   }
 
   int a[] = {7, 6, 9, 3, 5, 1, 2, 4};
index a211fe6..3d92964 100644 (file)
@@ -171,19 +171,38 @@ constexpr void test_initializer_list() {
 
 template <class It, class Sent = It>
 constexpr void test_range_types() {
-  int a[] = {7, 6, 9, 3, 5, 1, 2, 4};
+  std::iter_value_t<It> a[] = {7, 6, 9, 3, 5, 1, 2, 4};
   auto range = std::ranges::subrange(It(a), Sent(It(a + 8)));
-  int ret = std::ranges::min(range);
+  auto ret = std::ranges::min(range);
   assert(ret == 1);
 }
 
 constexpr void test_range() {
-  { // check that all range types work
-    test_range_types<cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>();
-    test_range_types<forward_iterator<int*>>();
-    test_range_types<bidirectional_iterator<int*>>();
-    test_range_types<random_access_iterator<int*>>();
-    test_range_types<contiguous_iterator<int*>>();
+  // check that all range types work
+  {
+    struct NonTrivialInt {
+      int val_;
+      constexpr NonTrivialInt(int val) : val_(val) {}
+      constexpr NonTrivialInt(const NonTrivialInt& other) : val_(other.val_) {}
+      constexpr NonTrivialInt& operator=(const NonTrivialInt& other) {
+        val_ = other.val_;
+        return *this;
+      }
+
+      constexpr ~NonTrivialInt() {}
+
+      auto operator<=>(const NonTrivialInt&) const = default;
+    };
+
+    auto call_with_sentinels = []<class Iter> {
+      if constexpr (std::forward_iterator<Iter>)
+        test_range_types<Iter, Iter>();
+      test_range_types<Iter, sentinel_wrapper<Iter>>();
+      test_range_types<Iter, sized_sentinel<Iter>>();
+    };
+
+    types::for_each(types::cpp20_input_iterator_list<int*>{}, call_with_sentinels);
+    types::for_each(types::cpp20_input_iterator_list<NonTrivialInt*>{}, call_with_sentinels);
   }
 
   int a[] = {7, 6, 9, 3, 5, 1, 2, 4};