From d4ace50ed0e5e761385a5d55845ee25ad12f41bb Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Sun, 28 Jul 2019 04:37:02 +0000 Subject: [PATCH] Fix PR35637: suboptimal codegen for `vector`. The optimizer is petulant and temperamental. In this case LLVM failed to lower the the "insert at end" loop used by`vector` to a `memset` despite `memset` being substantially faster over a range of bytes. LLVM has the ability to lower loops to `memset` whet appropriate, but the odd nature of libc++'s loops prevented the optimization from taking places. This patch addresses the issue by rewriting the loops from the form `do [ ... --__n; } while (__n > 0);` to instead use a for loop over a pointer range (For example: `for (auto *__i = ...; __i < __e; ++__i)`). This patch also rewrites the asan annotations to unposion all additional memory at the start of the loop instead of once per iterations. This could potentially permit false negatives where the constructor of element N attempts to access element N + 1 during its construction. The before and after results for the `BM_ConstructSize/vector_byte/5140480_mean` benchmark (run 5 times) are: -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- Before ------ BM_ConstructSize/vector_byte/5140480_mean 12530140 ns 12469693 ns N/A BM_ConstructSize/vector_byte/5140480_median 12512818 ns 12445571 ns N/A BM_ConstructSize/vector_byte/5140480_stddev 106224 ns 107907 ns 5 ----- After ----- BM_ConstructSize/vector_byte/5140480_mean 167285 ns 166500 ns N/A BM_ConstructSize/vector_byte/5140480_median 166749 ns 166069 ns N/A BM_ConstructSize/vector_byte/5140480_stddev 3242 ns 3184 ns 5 llvm-svn: 367183 --- libcxx/benchmarks/ContainerBenchmarks.hpp | 17 +++ libcxx/benchmarks/vector_operations.bench.cpp | 8 ++ libcxx/include/vector | 156 ++++++++++---------------- 3 files changed, 86 insertions(+), 95 deletions(-) diff --git a/libcxx/benchmarks/ContainerBenchmarks.hpp b/libcxx/benchmarks/ContainerBenchmarks.hpp index 509e3d2..648617a 100644 --- a/libcxx/benchmarks/ContainerBenchmarks.hpp +++ b/libcxx/benchmarks/ContainerBenchmarks.hpp @@ -7,6 +7,23 @@ namespace ContainerBenchmarks { +template +void BM_ConstructSize(benchmark::State& st, Container) { + auto size = st.range(0); + for (auto _ : st) { + Container c(size); + benchmark::DoNotOptimize(c.data()); + } +} + +template +void BM_ConstructSizeValue(benchmark::State& st, Container, typename Container::value_type const& val) { + const auto size = st.range(0); + for (auto _ : st) { + Container c(size, val); + benchmark::DoNotOptimize(c.data()); + } +} template void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) { diff --git a/libcxx/benchmarks/vector_operations.bench.cpp b/libcxx/benchmarks/vector_operations.bench.cpp index a2c4e5d..c41259e 100644 --- a/libcxx/benchmarks/vector_operations.bench.cpp +++ b/libcxx/benchmarks/vector_operations.bench.cpp @@ -13,6 +13,14 @@ using namespace ContainerBenchmarks; constexpr std::size_t TestNumInputs = 1024; +BENCHMARK_CAPTURE(BM_ConstructSize, + vector_byte, + std::vector{})->Arg(5140480); + +BENCHMARK_CAPTURE(BM_ConstructSizeValue, + vector_byte, + std::vector{}, 0)->Arg(5140480); + BENCHMARK_CAPTURE(BM_ConstructIterIter, vector_char, std::vector{}, diff --git a/libcxx/include/vector b/libcxx/include/vector index 82bf6e0..5481b2f 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -864,58 +864,67 @@ private: #else _LIBCPP_INLINE_VISIBILITY void __annotate_contiguous_container(const void*, const void*, const void*, - const void*) const {} + const void*) const _NOEXCEPT {} #endif _LIBCPP_INLINE_VISIBILITY - void __annotate_new(size_type __current_size) const { + void __annotate_new(size_type __current_size) const _NOEXCEPT { __annotate_contiguous_container(data(), data() + capacity(), data() + capacity(), data() + __current_size); } _LIBCPP_INLINE_VISIBILITY - void __annotate_delete() const { + void __annotate_delete() const _NOEXCEPT { __annotate_contiguous_container(data(), data() + capacity(), data() + size(), data() + capacity()); } _LIBCPP_INLINE_VISIBILITY - void __annotate_increase(size_type __n) const + void __annotate_increase(size_type __n) const _NOEXCEPT { __annotate_contiguous_container(data(), data() + capacity(), data() + size(), data() + size() + __n); } _LIBCPP_INLINE_VISIBILITY - void __annotate_shrink(size_type __old_size) const + void __annotate_shrink(size_type __old_size) const _NOEXCEPT { __annotate_contiguous_container(data(), data() + capacity(), data() + __old_size, data() + size()); } + + struct _ConstructTransaction { + explicit _ConstructTransaction(vector &__v, size_type __n) + : __v_(__v), __pos_(__v.__end_), __new_end_(__v.__end_ + __n) { #ifndef _LIBCPP_HAS_NO_ASAN - // The annotation for size increase should happen before the actual increase, - // but if an exception is thrown after that the annotation has to be undone. - struct __RAII_IncreaseAnnotator { - __RAII_IncreaseAnnotator(const vector &__v, size_type __n = 1) - : __commit(false), __v(__v), __old_size(__v.size() + __n) { - __v.__annotate_increase(__n); - } - void __done() { __commit = true; } - ~__RAII_IncreaseAnnotator() { - if (__commit) return; - __v.__annotate_shrink(__old_size); + __v_.__annotate_increase(__n); +#endif + } + ~_ConstructTransaction() { + __v_.__end_ = __pos_; +#ifndef _LIBCPP_HAS_NO_ASAN + if (__pos_ != __new_end_) { + __v_.__annotate_shrink(__new_end_ - __v_.__begin_); } - bool __commit; - const vector &__v; - size_type __old_size; - }; -#else - struct __RAII_IncreaseAnnotator { - _LIBCPP_INLINE_VISIBILITY - __RAII_IncreaseAnnotator(const vector &, size_type = 1) {} - _LIBCPP_INLINE_VISIBILITY void __done() {} - }; #endif + } + + vector &__v_; + pointer __pos_; + const_pointer const __new_end_; + + private: + _ConstructTransaction(_ConstructTransaction const&) = delete; + _ConstructTransaction& operator=(_ConstructTransaction const&) = delete; + }; + template + _LIBCPP_INLINE_VISIBILITY + void __construct_one_at_end(_Args&& ...__args) { + _ConstructTransaction __tx(*this, 1); + __alloc_traits::construct(this->__alloc(), _VSTD::__to_raw_pointer(__tx.__pos_), + _VSTD::forward<_Args>(__args)...); + ++__tx.__pos_; + } }; #ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES @@ -1027,15 +1036,10 @@ template void vector<_Tp, _Allocator>::__construct_at_end(size_type __n) { - allocator_type& __a = this->__alloc(); - do - { - __RAII_IncreaseAnnotator __annotator(*this); - __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_)); - ++this->__end_; - --__n; - __annotator.__done(); - } while (__n > 0); + _ConstructTransaction __tx(*this, __n); + for (; __tx.__pos_ != __tx.__new_end_; ++__tx.__pos_) { + __alloc_traits::construct(this->__alloc(), _VSTD::__to_raw_pointer(__tx.__pos_)); + } } // Copy constructs __n objects starting at __end_ from __x @@ -1049,15 +1053,10 @@ inline void vector<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x) { - allocator_type& __a = this->__alloc(); - do - { - __RAII_IncreaseAnnotator __annotator(*this); - __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), __x); - ++this->__end_; - --__n; - __annotator.__done(); - } while (__n > 0); + _ConstructTransaction __tx(*this, __n); + for (; __tx.__pos_ != __tx.__new_end_; ++__tx.__pos_) { + __alloc_traits::construct(this->__alloc(), _VSTD::__to_raw_pointer(__tx.__pos_), __x); + } } template @@ -1069,10 +1068,8 @@ typename enable_if >::type vector<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last, size_type __n) { - allocator_type& __a = this->__alloc(); - __RAII_IncreaseAnnotator __annotator(*this, __n); - __alloc_traits::__construct_range_forward(__a, __first, __last, this->__end_); - __annotator.__done(); + _ConstructTransaction __tx(*this, __n); + __alloc_traits::__construct_range_forward(this->__alloc(), __first, __last, __tx.__pos_); } // Default constructs __n objects starting at __end_ @@ -1633,11 +1630,7 @@ vector<_Tp, _Allocator>::push_back(const_reference __x) { if (this->__end_ != this->__end_cap()) { - __RAII_IncreaseAnnotator __annotator(*this); - __alloc_traits::construct(this->__alloc(), - _VSTD::__to_raw_pointer(this->__end_), __x); - __annotator.__done(); - ++this->__end_; + __construct_one_at_end(__x); } else __push_back_slow_path(__x); @@ -1652,12 +1645,7 @@ vector<_Tp, _Allocator>::push_back(value_type&& __x) { if (this->__end_ < this->__end_cap()) { - __RAII_IncreaseAnnotator __annotator(*this); - __alloc_traits::construct(this->__alloc(), - _VSTD::__to_raw_pointer(this->__end_), - _VSTD::move(__x)); - __annotator.__done(); - ++this->__end_; + __construct_one_at_end(_VSTD::move(__x)); } else __push_back_slow_path(_VSTD::move(__x)); @@ -1688,12 +1676,7 @@ vector<_Tp, _Allocator>::emplace_back(_Args&&... __args) { if (this->__end_ < this->__end_cap()) { - __RAII_IncreaseAnnotator __annotator(*this); - __alloc_traits::construct(this->__alloc(), - _VSTD::__to_raw_pointer(this->__end_), - _VSTD::forward<_Args>(__args)...); - __annotator.__done(); - ++this->__end_; + __construct_one_at_end(_VSTD::forward<_Args>(__args)...); } else __emplace_back_slow_path(_VSTD::forward<_Args>(__args)...); @@ -1761,10 +1744,15 @@ vector<_Tp, _Allocator>::__move_range(pointer __from_s, pointer __from_e, pointe { pointer __old_last = this->__end_; difference_type __n = __old_last - __to; - for (pointer __i = __from_s + __n; __i < __from_e; ++__i, ++this->__end_) - __alloc_traits::construct(this->__alloc(), - _VSTD::__to_raw_pointer(this->__end_), - _VSTD::move(*__i)); + { + pointer __i = __from_s + __n; + _ConstructTransaction __tx(*this, __from_e - __i); + for (; __i < __from_e; ++__i, ++__tx.__pos_) { + __alloc_traits::construct(this->__alloc(), + _VSTD::__to_raw_pointer(__tx.__pos_), + _VSTD::move(*__i)); + } + } _VSTD::move_backward(__from_s, __from_s + __n, __old_last); } @@ -1780,12 +1768,9 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, const_reference __x) pointer __p = this->__begin_ + (__position - begin()); if (this->__end_ < this->__end_cap()) { - __RAII_IncreaseAnnotator __annotator(*this); if (__p == this->__end_) { - __alloc_traits::construct(this->__alloc(), - _VSTD::__to_raw_pointer(this->__end_), __x); - ++this->__end_; + __construct_one_at_end(__x); } else { @@ -1795,7 +1780,6 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, const_reference __x) ++__xr; *__p = *__xr; } - __annotator.__done(); } else { @@ -1821,20 +1805,15 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, value_type&& __x) pointer __p = this->__begin_ + (__position - begin()); if (this->__end_ < this->__end_cap()) { - __RAII_IncreaseAnnotator __annotator(*this); if (__p == this->__end_) { - __alloc_traits::construct(this->__alloc(), - _VSTD::__to_raw_pointer(this->__end_), - _VSTD::move(__x)); - ++this->__end_; + __construct_one_at_end(_VSTD::move(__x)); } else { __move_range(__p, this->__end_, __p + 1); *__p = _VSTD::move(__x); } - __annotator.__done(); } else { @@ -1859,13 +1838,9 @@ vector<_Tp, _Allocator>::emplace(const_iterator __position, _Args&&... __args) pointer __p = this->__begin_ + (__position - begin()); if (this->__end_ < this->__end_cap()) { - __RAII_IncreaseAnnotator __annotator(*this); if (__p == this->__end_) { - __alloc_traits::construct(this->__alloc(), - _VSTD::__to_raw_pointer(this->__end_), - _VSTD::forward<_Args>(__args)...); - ++this->__end_; + __construct_one_at_end(_VSTD::forward<_Args>(__args)...); } else { @@ -1873,7 +1848,6 @@ vector<_Tp, _Allocator>::emplace(const_iterator __position, _Args&&... __args) __move_range(__p, this->__end_, __p + 1); *__p = _VSTD::move(__tmp.get()); } - __annotator.__done(); } else { @@ -1911,9 +1885,7 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, size_type __n, const_ } if (__n > 0) { - __RAII_IncreaseAnnotator __annotator(*this, __n); __move_range(__p, __old_last, __p + __old_n); - __annotator.__done(); const_pointer __xr = pointer_traits::pointer_to(__x); if (__p <= __xr && __xr < this->__end_) __xr += __old_n; @@ -1955,11 +1927,7 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, _InputIterator __firs pointer __old_last = this->__end_; for (; this->__end_ != this->__end_cap() && __first != __last; ++__first) { - __RAII_IncreaseAnnotator __annotator(*this); - __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), - *__first); - ++this->__end_; - __annotator.__done(); + __construct_one_at_end(*__first); } __split_buffer __v(__a); if (__first != __last) @@ -2026,9 +1994,7 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, _ForwardIterator __fi } if (__n > 0) { - __RAII_IncreaseAnnotator __annotator(*this, __n); __move_range(__p, __old_last, __p + __old_n); - __annotator.__done(); _VSTD::copy(__first, __m, __p); } } -- 2.7.4