call_once is using relaxed atomic load to perform double-checked locking, which contains a data race. The fast-path load has to be an acquire atomic load.
Differential Revision: https://reviews.llvm.org/D24028
llvm-svn: 280621
#endif
}
+template <class _ValueType>
+inline _LIBCPP_ALWAYS_INLINE
+_ValueType __libcpp_acquire_load(_ValueType const* __value) {
+#if !defined(_LIBCPP_HAS_NO_THREADS) && \
+ defined(__ATOMIC_ACQUIRE) && \
+ (__has_builtin(__atomic_load_n) || _GNUC_VER >= 407)
+ return __atomic_load_n(__value, __ATOMIC_ACQUIRE);
+#else
+ return *__value;
+#endif
+}
+
// addressof moved to <__functional_base>
template <class _Tp> class allocator;
void
call_once(once_flag& __flag, _Callable&& __func, _Args&&... __args)
{
- if (__libcpp_relaxed_load(&__flag.__state_) != ~0ul)
+ if (__libcpp_acquire_load(&__flag.__state_) != ~0ul)
{
typedef tuple<_Callable&&, _Args&&...> _Gp;
_Gp __f(_VSTD::forward<_Callable>(__func), _VSTD::forward<_Args>(__args)...);
void
call_once(once_flag& __flag, _Callable& __func)
{
- if (__libcpp_relaxed_load(&__flag.__state_) != ~0ul)
+ if (__libcpp_acquire_load(&__flag.__state_) != ~0ul)
{
__call_once_param<_Callable> __p(__func);
__call_once(__flag.__state_, &__p, &__call_once_proxy<_Callable>);
static __libcpp_condvar_t cv = _LIBCPP_CONDVAR_INITIALIZER;
#endif
-/// NOTE: Changes to flag are done via relaxed atomic stores
-/// even though the accesses are protected by a mutex because threads
-/// just entering 'call_once` concurrently read from flag.
void
__call_once(volatile unsigned long& flag, void* arg, void(*func)(void*))
{
__libcpp_mutex_unlock(&mut);
func(arg);
__libcpp_mutex_lock(&mut);
- __libcpp_relaxed_store(&flag, ~0ul);
+ __libcpp_atomic_store(&flag, ~0ul, _AO_Release);
__libcpp_mutex_unlock(&mut);
__libcpp_condvar_broadcast(&cv);
#ifndef _LIBCPP_NO_EXCEPTIONS