tsan: speed up pthread_setname_np
authorDmitry Vyukov <dvyukov@google.com>
Mon, 15 Nov 2021 18:00:31 +0000 (19:00 +0100)
committerDmitry Vyukov <dvyukov@google.com>
Tue, 16 Nov 2021 06:51:08 +0000 (07:51 +0100)
pthread_setname_np does linear search over all thread descriptors
to map pthread_t to the thread descriptor. This has O(N^2) complexity
and becomes much worse in the new tsan runtime that keeps all ever
existed threads in the thread registry.
Replace linear search with direct access if pthread_setname_np
is called for the current thread (a very common case).

Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D113916

compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
compiler-rt/test/tsan/bench_threads.cpp [new file with mode: 0644]

index 9b62b20..2f04cd2 100644 (file)
@@ -90,6 +90,7 @@ DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *)
 DECLARE_REAL(int, fflush, __sanitizer_FILE *fp)
 DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr size)
 DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
+extern "C" int pthread_equal(void *t1, void *t2);
 extern "C" void *pthread_self();
 extern "C" void _exit(int status);
 #if !SANITIZER_NETBSD
@@ -2392,8 +2393,11 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc,
 #define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) \
   ThreadSetName(((TsanInterceptorContext *) ctx)->thr, name)
 
-#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \
-  __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
+#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name)         \
+  if (pthread_equal(pthread_self(), reinterpret_cast<void *>(thread))) \
+    COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name);                     \
+  else                                                                 \
+    __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
 
 #define COMMON_INTERCEPTOR_BLOCK_REAL(name) BLOCK_REAL(name)
 
diff --git a/compiler-rt/test/tsan/bench_threads.cpp b/compiler-rt/test/tsan/bench_threads.cpp
new file mode 100644 (file)
index 0000000..1d0be21
--- /dev/null
@@ -0,0 +1,45 @@
+// RUN: %clangxx_tsan %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+
+// bench.h needs pthread barriers which are not available on OS X
+// UNSUPPORTED: darwin
+
+#include "bench.h"
+
+void *nop_thread(void *arg) {
+  pthread_setname_np(pthread_self(), "nop_thread");
+  return nullptr;
+}
+
+void thread(int tid) {
+  for (int i = 0; i < bench_niter; i++) {
+    pthread_t th;
+    pthread_create(&th, nullptr, nop_thread, nullptr);
+    pthread_join(th, nullptr);
+  }
+}
+
+void bench() {
+  // Benchmark thread creation/joining in presence of a large number
+  // of threads (both alive and already joined).
+  printf("starting transient threads...\n");
+  for (int i = 0; i < 200; i++) {
+    const int kBatch = 100;
+    pthread_t th[kBatch];
+    for (int j = 0; j < kBatch; j++)
+      pthread_create(&th[j], nullptr, nop_thread, nullptr);
+    for (int j = 0; j < kBatch; j++)
+      pthread_join(th[j], nullptr);
+  }
+  printf("starting persistent threads...\n");
+  const int kLiveThreads = 2000;
+  pthread_t th[kLiveThreads];
+  for (int j = 0; j < kLiveThreads; j++)
+    pthread_create(&th[j], nullptr, nop_thread, nullptr);
+  printf("starting benchmark threads...\n");
+  start_thread_group(bench_nthread, thread);
+  for (int j = 0; j < kLiveThreads; j++)
+    pthread_join(th[j], nullptr);
+}
+
+// CHECK: DONE