From b07880454ba32e48a2e7f7be35516e0b76f60077 Mon Sep 17 00:00:00 2001 From: Laramie Leavitt Date: Tue, 24 May 2022 10:27:37 +0200 Subject: [PATCH] [libc++] Replace modulus operations in std::seed_seq::generate with conditional checks. Abseil benchmarks suggest that the conditional checks result in faster code (4-5x) as they are compiled into conditional move instructions (cmov on x86). Reviewed By: #libc, philnik, Mordante Spies: pengfei, Mordante, philnik, libcxx-commits Differential Revision: https://reviews.llvm.org/D125329 --- libcxx/benchmarks/random.bench.cpp | 33 +++++++++++++++++ libcxx/include/__random/seed_seq.h | 74 +++++++++++++++++++++++++------------- 2 files changed, 82 insertions(+), 25 deletions(-) create mode 100644 libcxx/benchmarks/random.bench.cpp diff --git a/libcxx/benchmarks/random.bench.cpp b/libcxx/benchmarks/random.bench.cpp new file mode 100644 index 0000000..e437849 --- /dev/null +++ b/libcxx/benchmarks/random.bench.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +constexpr std::size_t MAX_BUFFER_LEN = 256; +constexpr std::size_t MAX_SEED_LEN = 16; + +static void BM_SeedSeq_Generate(benchmark::State& state) { + std::array buffer; + std::array seeds; + { + std::random_device rd; + std::generate(std::begin(seeds), std::begin(seeds) + state.range(0), [&]() { return rd(); }); + } + std::seed_seq seed(std::begin(seeds), std::begin(seeds) + state.range(0)); + for (auto _ : state) { + seed.generate(std::begin(buffer), std::begin(buffer) + state.range(1)); + } +} +BENCHMARK(BM_SeedSeq_Generate)->Ranges({{1, MAX_SEED_LEN}, {1, MAX_BUFFER_LEN}}); + +BENCHMARK_MAIN(); diff --git a/libcxx/include/__random/seed_seq.h b/libcxx/include/__random/seed_seq.h index 8640cd1..330537f 100644 --- a/libcxx/include/__random/seed_seq.h +++ b/libcxx/include/__random/seed_seq.h @@ -109,39 +109,63 @@ seed_seq::generate(_RandomAccessIterator __first, _RandomAccessIterator __last) __first[__q] += __r; __first[0] = __r; } + // Initialize indexing terms used with if statements as an optimization to + // avoid calculating modulo n on every loop iteration for each term. + size_t __kmodn = 0; // __k % __n + size_t __k1modn = __n - 1; // (__k - 1) % __n + size_t __kpmodn = __p % __n; // (__k + __p) % __n + size_t __kqmodn = __q % __n; // (__k + __q) % __n + for (size_t __k = 1; __k <= __s; ++__k) { - const size_t __kmodn = __k % __n; - const size_t __kpmodn = (__k + __p) % __n; - result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn] - ^ __first[(__k - 1) % __n]); - __first[__kpmodn] += __r; - __r += __kmodn + __v_[__k-1]; - __first[(__k + __q) % __n] += __r; - __first[__kmodn] = __r; + if (++__kmodn == __n) + __kmodn = 0; + if (++__k1modn == __n) + __k1modn = 0; + if (++__kpmodn == __n) + __kpmodn = 0; + if (++__kqmodn == __n) + __kqmodn = 0; + + result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn] ^ __first[__k1modn]); + __first[__kpmodn] += __r; + __r += __kmodn + __v_[__k - 1]; + __first[__kqmodn] += __r; + __first[__kmodn] = __r; } for (size_t __k = __s + 1; __k < __m; ++__k) { - const size_t __kmodn = __k % __n; - const size_t __kpmodn = (__k + __p) % __n; - result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn] - ^ __first[(__k - 1) % __n]); - __first[__kpmodn] += __r; - __r += __kmodn; - __first[(__k + __q) % __n] += __r; - __first[__kmodn] = __r; + if (++__kmodn == __n) + __kmodn = 0; + if (++__k1modn == __n) + __k1modn = 0; + if (++__kpmodn == __n) + __kpmodn = 0; + if (++__kqmodn == __n) + __kqmodn = 0; + + result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn] ^ __first[__k1modn]); + __first[__kpmodn] += __r; + __r += __kmodn; + __first[__kqmodn] += __r; + __first[__kmodn] = __r; } for (size_t __k = __m; __k < __m + __n; ++__k) { - const size_t __kmodn = __k % __n; - const size_t __kpmodn = (__k + __p) % __n; - result_type __r = 1566083941 * _Tp(__first[__kmodn] + - __first[__kpmodn] + - __first[(__k - 1) % __n]); - __first[__kpmodn] ^= __r; - __r -= __kmodn; - __first[(__k + __q) % __n] ^= __r; - __first[__kmodn] = __r; + if (++__kmodn == __n) + __kmodn = 0; + if (++__k1modn == __n) + __k1modn = 0; + if (++__kpmodn == __n) + __kpmodn = 0; + if (++__kqmodn == __n) + __kqmodn = 0; + + result_type __r = 1566083941 * _Tp(__first[__kmodn] + __first[__kpmodn] + __first[__k1modn]); + __first[__kpmodn] ^= __r; + __r -= __kmodn; + __first[__kqmodn] ^= __r; + __first[__kmodn] = __r; } } } -- 2.7.4