From b0945e1bd2e2d7ac6dd8152e479a56db302d981e Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Mon, 12 Aug 2019 07:51:05 +0000 Subject: [PATCH] Improve codegen for deque. This patch rewrites a few loops in deque and split_buffer to better optimize the codegen. For constructors like `deque d(500000, 0);` this patch results in a 2x speedup. The patch improves the codegen in roughly three ways: 1. Changes do { ... } while (...) loops into more typical for loops. The optimizer can reason about normal looking loops better. 2. Split the iteration over a range into (A) iteration over the blocks, then (B) iteration within the block. This nested structure helps LLVM lower the inner loop to `memset`. 3. Do fewer things each iteration. Some of these loops were incrementing or changing 4-5 variables every loop (in addition to the construction). Previously most loops would increment the end pointer, the size, and decrement the count of remaining items to construct. Now we only increment a single pointer for most iterations. llvm-svn: 368547 --- libcxx/benchmarks/ContainerBenchmarks.hpp | 16 ++++- libcxx/benchmarks/Utilities.hpp | 33 +++++++++++ libcxx/benchmarks/deque.bench.cpp | 47 +++++++++++++++ libcxx/include/__split_buffer | 45 ++++++++------ libcxx/include/deque | 97 +++++++++++++++++++++++++++++-- 5 files changed, 210 insertions(+), 28 deletions(-) create mode 100644 libcxx/benchmarks/Utilities.hpp create mode 100644 libcxx/benchmarks/deque.bench.cpp diff --git a/libcxx/benchmarks/ContainerBenchmarks.hpp b/libcxx/benchmarks/ContainerBenchmarks.hpp index 648617a..581d3c5 100644 --- a/libcxx/benchmarks/ContainerBenchmarks.hpp +++ b/libcxx/benchmarks/ContainerBenchmarks.hpp @@ -1,8 +1,18 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #ifndef BENCHMARK_CONTAINER_BENCHMARKS_HPP #define BENCHMARK_CONTAINER_BENCHMARKS_HPP #include +#include "Utilities.hpp" #include "benchmark/benchmark.h" namespace ContainerBenchmarks { @@ -12,7 +22,7 @@ void BM_ConstructSize(benchmark::State& st, Container) { auto size = st.range(0); for (auto _ : st) { Container c(size); - benchmark::DoNotOptimize(c.data()); + DoNotOptimizeData(c); } } @@ -21,7 +31,7 @@ void BM_ConstructSizeValue(benchmark::State& st, Container, typename Container:: const auto size = st.range(0); for (auto _ : st) { Container c(size, val); - benchmark::DoNotOptimize(c.data()); + DoNotOptimizeData(c); } } @@ -33,7 +43,7 @@ void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) { benchmark::DoNotOptimize(&in); while (st.KeepRunning()) { Container c(begin, end); - benchmark::DoNotOptimize(c.data()); + DoNotOptimizeData(c); } } diff --git a/libcxx/benchmarks/Utilities.hpp b/libcxx/benchmarks/Utilities.hpp new file mode 100644 index 0000000..1b25f74 --- /dev/null +++ b/libcxx/benchmarks/Utilities.hpp @@ -0,0 +1,33 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef BENCHMARK_UTILITIES_HPP +#define BENCHMARK_UTILITIES_HPP + +#include +#include + +#include "benchmark/benchmark.h" + +namespace UtilitiesInternal { + template + auto HaveDataImpl(int) -> decltype((std::declval().data(), std::true_type{})); + template + auto HaveDataImpl(long) -> std::false_type; + template + using HasData = decltype(HaveDataImpl(0)); +} // namespace UtilitiesInternal + +template ::value>* = nullptr> +void DoNotOptimizeData(Container &c) { benchmark::DoNotOptimize(c.data()); } +template ::value>* = nullptr> +void DoNotOptimizeData(Container &c) { benchmark::DoNotOptimize(&c); } + + +#endif // BENCHMARK_UTILITIES_HPP diff --git a/libcxx/benchmarks/deque.bench.cpp b/libcxx/benchmarks/deque.bench.cpp new file mode 100644 index 0000000..44a492b --- /dev/null +++ b/libcxx/benchmarks/deque.bench.cpp @@ -0,0 +1,47 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "benchmark/benchmark.h" + +#include "ContainerBenchmarks.hpp" +#include "GenerateInput.hpp" + +using namespace ContainerBenchmarks; + +constexpr std::size_t TestNumInputs = 1024; + +BENCHMARK_CAPTURE(BM_ConstructSize, + deque_byte, + std::deque{})->Arg(5140480); + +BENCHMARK_CAPTURE(BM_ConstructSizeValue, + deque_byte, + std::deque{}, 0)->Arg(5140480); + +BENCHMARK_CAPTURE(BM_ConstructIterIter, + deque_char, + std::deque{}, + getRandomIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_ConstructIterIter, + deque_size_t, + std::deque{}, + getRandomIntegerInputs)->Arg(TestNumInputs); + +BENCHMARK_CAPTURE(BM_ConstructIterIter, + deque_string, + std::deque{}, + getRandomStringInputs)->Arg(TestNumInputs); + + + + +BENCHMARK_MAIN(); diff --git a/libcxx/include/__split_buffer b/libcxx/include/__split_buffer index 1daa4e5..095fe89 100644 --- a/libcxx/include/__split_buffer +++ b/libcxx/include/__split_buffer @@ -161,6 +161,19 @@ private: _LIBCPP_INLINE_VISIBILITY void __move_assign_alloc(__split_buffer&, false_type) _NOEXCEPT {} + + struct _ConstructTransaction { + explicit _ConstructTransaction(pointer* __p, size_type __n) _NOEXCEPT + : __pos_(*__p), __end_(*__p + __n), __dest_(__p) { + } + ~_ConstructTransaction() { + *__dest_ = __pos_; + } + pointer __pos_; + const pointer __end_; + private: + pointer *__dest_; + }; }; template @@ -197,13 +210,10 @@ template void __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n) { - __alloc_rr& __a = this->__alloc(); - do - { - __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_)); - ++this->__end_; - --__n; - } while (__n > 0); + _ConstructTransaction __tx(&this->__end_, __n); + for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) { + __alloc_traits::construct(this->__alloc(), _VSTD::__to_raw_pointer(__tx.__pos_)); + } } // Copy constructs __n objects starting at __end_ from __x @@ -216,13 +226,11 @@ template void __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x) { - __alloc_rr& __a = this->__alloc(); - do - { - __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), __x); - ++this->__end_; - --__n; - } while (__n > 0); + _ConstructTransaction __tx(&this->__end_, __n); + for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) { + __alloc_traits::construct(this->__alloc(), + _VSTD::__to_raw_pointer(__tx.__pos_), __x); + } } template @@ -262,11 +270,10 @@ typename enable_if >::type __split_buffer<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last) { - __alloc_rr& __a = this->__alloc(); - for (; __first != __last; ++__first) - { - __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), *__first); - ++this->__end_; + _ConstructTransaction __tx(&this->__end_, std::distance(__first, __last)); + for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_, ++__first) { + __alloc_traits::construct(this->__alloc(), + _VSTD::__to_raw_pointer(__tx.__pos_), *__first); } } diff --git a/libcxx/include/deque b/libcxx/include/deque index fe988d0..cb7e4e5 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -956,6 +956,74 @@ public: typedef __deque_iterator const_iterator; + struct __deque_block_range { + explicit __deque_block_range(pointer __b, pointer __e) _NOEXCEPT : __begin_(__b), __end_(__e) {} + const pointer __begin_; + const pointer __end_; + }; + + struct __deque_range { + iterator __pos_; + const iterator __end_; + + __deque_range(iterator __pos, iterator __e) _NOEXCEPT + : __pos_(__pos), __end_(__e) {} + + explicit operator bool() const _NOEXCEPT { + return __pos_ != __end_; + } + + __deque_range begin() const { + return *this; + } + + __deque_range end() const { + return __deque_range(__end_, __end_); + } + __deque_block_range operator*() const _NOEXCEPT { + if (__pos_.__m_iter_ == __end_.__m_iter_) { + return __deque_block_range(__pos_.__ptr_, __end_.__ptr_); + } + return __deque_block_range(__pos_.__ptr_, *__pos_.__m_iter_ + __block_size); + } + + __deque_range& operator++() _NOEXCEPT { + if (__pos_.__m_iter_ == __end_.__m_iter_) { + __pos_ = __end_; + } else { + ++__pos_.__m_iter_; + __pos_.__ptr_ = *__pos_.__m_iter_; + } + return *this; + } + + + friend bool operator==(__deque_range const& __lhs, __deque_range const& __rhs) { + return __lhs.__pos_ == __rhs.__pos_; + } + friend bool operator!=(__deque_range const& __lhs, __deque_range const& __rhs) { + return !(__lhs == __rhs); + } + }; + + + + struct _ConstructTransaction { + _ConstructTransaction(__deque_base* __db, __deque_block_range& __r) + : __pos_(__r.__begin_), __end_(__r.__end_), __begin_(__r.__begin_), __base_(__db) {} + + + ~_ConstructTransaction() { + __base_->size() += (__pos_ - __begin_); + } + + pointer __pos_; + const pointer __end_; + private: + const pointer __begin_; + __deque_base * const __base_; + }; + protected: __map __map_; size_type __start_; @@ -1222,6 +1290,10 @@ public: typedef _VSTD::reverse_iterator reverse_iterator; typedef _VSTD::reverse_iterator const_reverse_iterator; + using typename __base::__deque_range; + using typename __base::__deque_block_range; + using typename __base::_ConstructTransaction; + // construct/copy/destroy: _LIBCPP_INLINE_VISIBILITY deque() @@ -2299,8 +2371,12 @@ deque<_Tp, _Allocator>::__append(_ForIter __f, _ForIter __l, if (__n > __back_capacity) __add_back_capacity(__n - __back_capacity); // __n <= __back_capacity - for (iterator __i = __base::end(); __f != __l; ++__i, (void) ++__f, ++__base::size()) - __alloc_traits::construct(__a, _VSTD::addressof(*__i), *__f); + for (__deque_block_range __br : __deque_range(__base::end(), __base::end() + __n)) { + _ConstructTransaction __tx(this, __br); + for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_, (void)++__f) { + __alloc_traits::construct(__a, std::__to_raw_pointer(__tx.__pos_), *__f); + } + } } template @@ -2312,8 +2388,12 @@ deque<_Tp, _Allocator>::__append(size_type __n) if (__n > __back_capacity) __add_back_capacity(__n - __back_capacity); // __n <= __back_capacity - for (iterator __i = __base::end(); __n; --__n, ++__i, ++__base::size()) - __alloc_traits::construct(__a, _VSTD::addressof(*__i)); + for (__deque_block_range __br : __deque_range(__base::end(), __base::end() + __n)) { + _ConstructTransaction __tx(this, __br); + for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) { + __alloc_traits::construct(__a, std::__to_raw_pointer(__tx.__pos_)); + } + } } template @@ -2325,8 +2405,13 @@ deque<_Tp, _Allocator>::__append(size_type __n, const value_type& __v) if (__n > __back_capacity) __add_back_capacity(__n - __back_capacity); // __n <= __back_capacity - for (iterator __i = __base::end(); __n; --__n, ++__i, ++__base::size()) - __alloc_traits::construct(__a, _VSTD::addressof(*__i), __v); + for (__deque_block_range __br : __deque_range(__base::end(), __base::end() + __n)) { + _ConstructTransaction __tx(this, __br); + for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) { + __alloc_traits::construct(__a, std::__to_raw_pointer(__tx.__pos_), __v); + } + } + } // Create front capacity for one block of elements. -- 2.7.4