libstdc++: Skip atomic instructions in shared_ptr when both counts are 1

author Maged Michael <maged.michael@gmail.com>

Tue, 7 Dec 2021 15:20:58 +0000 (15:20 +0000)

committer Jonathan Wakely <jwakely@redhat.com>

Wed, 8 Dec 2021 11:39:34 +0000 (11:39 +0000)
author Maged Michael <maged.michael@gmail.com>
Tue, 7 Dec 2021 15:20:58 +0000 (15:20 +0000)
committer Jonathan Wakely <jwakely@redhat.com>
Wed, 8 Dec 2021 11:39:34 +0000 (11:39 +0000)
diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config

index 90513cc..f2d704f 100644 (file)
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -577,6 +577,15 @@ namespace std
    do { __glibcxx_constexpr_assert(cond); } while (false)
  #endif
  
+// Macro indicating that TSAN is in use.
+#if __SANITIZE_THREAD__
+#  define _GLIBCXX_TSAN 1
+#elif defined __has_feature
+# if __has_feature(thread_sanitizer)
+#  define _GLIBCXX_TSAN 1
+# endif
+#endif
+
  // Macros for race detectors.
  // _GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(A) and
  // _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(A) should be used to explain
diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h b/libstdc++-v3/include/bits/shared_ptr_base.h

index 3473a74..90ad309 100644 (file)
--- a/libstdc++-v3/include/bits/shared_ptr_base.h
+++ b/libstdc++-v3/include/bits/shared_ptr_base.h
@@ -143,10 +143,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        virtual void*
        _M_get_deleter(const std::type_info&) noexcept = 0;
  
+      // Increment the use count (used when the count is greater than zero).
        void
        _M_add_ref_copy()
        { __gnu_cxx::__atomic_add_dispatch(&_M_use_count, 1); }
  
+      // Increment the use count if it is non-zero, throw otherwise.
        void
        _M_add_ref_lock()
        {
@@ -154,42 +156,51 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
           __throw_bad_weak_ptr();
        }
  
+      // Increment the use count if it is non-zero.
        bool
        _M_add_ref_lock_nothrow() noexcept;
  
+      // Decrement the use count.
        void
-      _M_release() noexcept
+      _M_release() noexcept;
+
+      // Called by _M_release() when the use count reaches zero.
+      void
+      _M_release_last_use() noexcept
        {
-        // Be race-detector-friendly.  For more info see bits/c++config.
-        _GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&_M_use_count);
-       if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, -1) == 1)
+       _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_use_count);
+       _M_dispose();
+       // There must be a memory barrier between dispose() and destroy()
+       // to ensure that the effects of dispose() are observed in the
+       // thread that runs destroy().
+       // See http://gcc.gnu.org/ml/libstdc++/2005-11/msg00136.html
+       if (_Mutex_base<_Lp>::_S_need_barriers)
           {
-            _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_use_count);
-           _M_dispose();
-           // There must be a memory barrier between dispose() and destroy()
-           // to ensure that the effects of dispose() are observed in the
-           // thread that runs destroy().
-           // See http://gcc.gnu.org/ml/libstdc++/2005-11/msg00136.html
-           if (_Mutex_base<_Lp>::_S_need_barriers)
-             {
-               __atomic_thread_fence (__ATOMIC_ACQ_REL);
-             }
+           __atomic_thread_fence (__ATOMIC_ACQ_REL);
+         }
  
-            // Be race-detector-friendly.  For more info see bits/c++config.
-            _GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&_M_weak_count);
-           if (__gnu_cxx::__exchange_and_add_dispatch(&_M_weak_count,
-                                                      -1) == 1)
-              {
-                _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_weak_count);
-               _M_destroy();
-              }
+       // Be race-detector-friendly.  For more info see bits/c++config.
+       _GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&_M_weak_count);
+       if (__gnu_cxx::__exchange_and_add_dispatch(&_M_weak_count,
+                                                  -1) == 1)
+         {
+           _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_weak_count);
+           _M_destroy();
           }
        }
  
+      // As above, but 'noinline' to reduce code size on the cold path.
+      __attribute__((__noinline__))
+      void
+      _M_release_last_use_cold() noexcept
+      { _M_release_last_use(); }
+
+      // Increment the weak count.
        void
        _M_weak_add_ref() noexcept
        { __gnu_cxx::__atomic_add_dispatch(&_M_weak_count, 1); }
  
+      // Decrement the weak count.
        void
        _M_weak_release() noexcept
        {
@@ -288,6 +299,67 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  
    template<>
      inline void
+    _Sp_counted_base<_S_mutex>::_M_release() noexcept
+    {
+      // Be race-detector-friendly.  For more info see bits/c++config.
+      _GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&_M_use_count);
+      if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, -1) == 1)
+       {
+         _M_release_last_use();
+       }
+    }
+
+  template<>
+    inline void
+    _Sp_counted_base<_S_atomic>::_M_release() noexcept
+    {
+      _GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&_M_use_count);
+#if ! _GLIBCXX_TSAN
+      constexpr bool __lock_free
+       = __atomic_always_lock_free(sizeof(long long), 0)
+       && __atomic_always_lock_free(sizeof(_Atomic_word), 0);
+      constexpr bool __double_word
+       = sizeof(long long) == 2 * sizeof(_Atomic_word);
+      // The ref-count members follow the vptr, so are aligned to
+      // alignof(void*).
+      constexpr bool __aligned = __alignof(long long) <= alignof(void*);
+      if _GLIBCXX17_CONSTEXPR (__lock_free && __double_word && __aligned)
+       {
+         constexpr long long __unique_ref
+           = 1LL + (1LL << (__CHAR_BIT__ * sizeof(_Atomic_word)));
+         auto __both_counts = reinterpret_cast<long long*>(&_M_use_count);
+
+         _GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(&_M_weak_count);
+         if (__atomic_load_n(__both_counts, __ATOMIC_ACQUIRE) == __unique_ref)
+           {
+             // Both counts are 1, so there are no weak references and
+             // we are releasing the last strong reference. No other
+             // threads can observe the effects of this _M_release()
+             // call (e.g. calling use_count()) without a data race.
+             *(long long*)(&_M_use_count) = 0;
+             _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_use_count);
+             _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_weak_count);
+             _M_dispose();
+             _M_destroy();
+             return;
+           }
+         if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, -1) == 1)
+           [[__unlikely__]]
+           {
+             _M_release_last_use_cold();
+             return;
+           }
+       }
+      else
+#endif
+      if (__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, -1) == 1)
+       {
+         _M_release_last_use();
+       }
+    }
+
+  template<>
+    inline void
      _Sp_counted_base<_S_single>::_M_weak_add_ref() noexcept
      { ++_M_weak_count; }
author	Maged Michael <maged.michael@gmail.com>
	Tue, 7 Dec 2021 15:20:58 +0000 (15:20 +0000)
committer	Jonathan Wakely <jwakely@redhat.com>
	Wed, 8 Dec 2021 11:39:34 +0000 (11:39 +0000)
libstdc++-v3/include/bits/c++config		patch \| blob \| history
libstdc++-v3/include/bits/shared_ptr_base.h		patch \| blob \| history