Commit
efd254b6362 ("tsan: fix deadlock in pthread_atfork callbacks")
fixed another deadlock related to atfork handling.
But builders with DCHECKs enabled reported failures of
pthread_atfork_deadlock2.c and pthread_atfork_deadlock3.c tests
related to the fact that we hold runtime locks on interceptor exit:
https://lab.llvm.org/buildbot/#/builders/70/builds/6727
This issue is somewhat inherent to the current approach,
we indeed execute user code (atfork callbacks) with runtime lock held.
Refactor fork handling to not run user code (atfork callbacks)
with runtime locks held. This change does this by installing
own atfork callbacks during runtime initialization.
Atfork callbacks run in LIFO order, so the expectation is that
our callbacks run last, right before the actual fork.
This way we lock runtime mutexes around fork, but not around
user callbacks.
Extend tests to also install after fork callbacks just to cover
more scenarios. Some tests also started reporting real races
that we previously suppressed.
Reviewed By: vitalybuka
Differential Revision: https://reviews.llvm.org/D101385
extern "C" int pthread_attr_destroy(void *attr);
DECLARE_REAL(int, pthread_attr_getdetachstate, void *, void *)
extern "C" int pthread_attr_setstacksize(void *attr, uptr stacksize);
+extern "C" int pthread_atfork(void (*prepare)(void), void (*parent)(void),
+ void (*child)(void));
extern "C" int pthread_key_create(unsigned *key, void (*destructor)(void* v));
extern "C" int pthread_setspecific(unsigned key, const void *v);
DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *)
// because in async signal processing case (when handler is called directly
// from rtl_generic_sighandler) we have not yet received the reraised
// signal; and it looks too fragile to intercept all ways to reraise a signal.
- if (flags()->report_bugs && !sync && sig != SIGTERM && errno != 99) {
+ if (ShouldReport(thr, ReportTypeErrnoInSignal) && !sync && sig != SIGTERM &&
+ errno != 99) {
VarSizeStackTrace stack;
// StackTrace::GetNestInstructionPc(pc) is used because return address is
// expected, OutputReport() will undo this.
if (in_symbolizer())
return REAL(fork)(fake);
SCOPED_INTERCEPTOR_RAW(fork, fake);
+ return REAL(fork)(fake);
+}
+
+void atfork_prepare() {
+ if (in_symbolizer())
+ return;
+ ThreadState *thr = cur_thread();
+ const uptr pc = StackTrace::GetCurrentPc();
ForkBefore(thr, pc);
- int pid;
- {
- // On OS X, REAL(fork) can call intercepted functions (OSSpinLockLock), and
- // we'll assert in CheckNoLocks() unless we ignore interceptors.
- ScopedIgnoreInterceptors ignore;
- pid = REAL(fork)(fake);
- }
- if (pid == 0) {
- // child
- ForkChildAfter(thr, pc);
- FdOnFork(thr, pc);
- } else if (pid > 0) {
- // parent
- ForkParentAfter(thr, pc);
- } else {
- // error
- ForkParentAfter(thr, pc);
- }
- return pid;
+}
+
+void atfork_parent() {
+ if (in_symbolizer())
+ return;
+ ThreadState *thr = cur_thread();
+ const uptr pc = StackTrace::GetCurrentPc();
+ ForkParentAfter(thr, pc);
+}
+
+void atfork_child() {
+ if (in_symbolizer())
+ return;
+ ThreadState *thr = cur_thread();
+ const uptr pc = StackTrace::GetCurrentPc();
+ ForkChildAfter(thr, pc);
+ FdOnFork(thr, pc);
}
TSAN_INTERCEPTOR(int, vfork, int fake) {
Printf("ThreadSanitizer: failed to setup atexit callback\n");
Die();
}
+ if (pthread_atfork(atfork_prepare, atfork_parent, atfork_child)) {
+ Printf("ThreadSanitizer: failed to setup atfork callbacks\n");
+ Die();
+ }
#if !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_FREEBSD
if (pthread_key_create(&interceptor_ctx()->finalize_key, &thread_finalize)) {
static void SignalUnsafeCall(ThreadState *thr, uptr pc) {
if (atomic_load_relaxed(&thr->in_signal_handler) == 0 ||
- !flags()->report_signal_unsafe)
+ !ShouldReport(thr, ReportTypeSignalUnsafe))
return;
VarSizeStackTrace stack;
ObtainCurrentStack(thr, pc, &stack);
void ForkBefore(ThreadState *thr, uptr pc) {
ctx->thread_registry->Lock();
ctx->report_mtx.Lock();
- // Ignore memory accesses in the pthread_atfork callbacks.
- // If any of them triggers a data race we will deadlock
- // on the report_mtx.
- // We could ignore interceptors and sync operations as well,
+ // Suppress all reports in the pthread_atfork callbacks.
+ // Reports will deadlock on the report_mtx.
+ // We could ignore sync operations as well,
// but so far it's unclear if it will do more good or harm.
// Unnecessarily ignoring things can lead to false positives later.
- ThreadIgnoreBegin(thr, pc);
+ thr->suppress_reports++;
+ // On OS X, REAL(fork) can call intercepted functions (OSSpinLockLock), and
+ // we'll assert in CheckNoLocks() unless we ignore interceptors.
+ thr->ignore_interceptors++;
}
void ForkParentAfter(ThreadState *thr, uptr pc) {
- ThreadIgnoreEnd(thr, pc); // Begin is in ForkBefore.
+ thr->suppress_reports--; // Enabled in ForkBefore.
+ thr->ignore_interceptors--;
ctx->report_mtx.Unlock();
ctx->thread_registry->Unlock();
}
void ForkChildAfter(ThreadState *thr, uptr pc) {
- ThreadIgnoreEnd(thr, pc); // Begin is in ForkBefore.
+ thr->suppress_reports--; // Enabled in ForkBefore.
+ thr->ignore_interceptors--;
ctx->report_mtx.Unlock();
ctx->thread_registry->Unlock();
ScopedErrorReportLock lock_;
};
+bool ShouldReport(ThreadState *thr, ReportType typ);
ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack);
void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
MutexSet *mset, uptr *tag = nullptr);
// or false positives (e.g. unlock in a different thread).
if (SANITIZER_GO)
return;
+ if (!ShouldReport(thr, typ))
+ return;
ThreadRegistryLock l(ctx->thread_registry);
ScopedReport rep(typ);
rep.AddMutex(mid);
if (!unlock_locked)
s->Reset(thr->proc()); // must not reset it before the report is printed
s->mtx.Unlock();
- if (unlock_locked) {
+ if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) {
ThreadRegistryLock l(ctx->thread_registry);
ScopedReport rep(ReportTypeMutexDestroyLocked);
rep.AddMutex(mid);
}
void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
- if (r == 0)
+ if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock))
return;
ThreadRegistryLock l(ctx->thread_registry);
ScopedReport rep(ReportTypeDeadlock);
return stack;
}
+bool ShouldReport(ThreadState *thr, ReportType typ) {
+ // We set thr->suppress_reports in the fork context.
+ // Taking any locking in the fork context can lead to deadlocks.
+ // If any locks are already taken, it's too late to do this check.
+ CheckNoLocks(thr);
+ // For the same reason check we didn't lock thread_registry yet.
+ if (SANITIZER_DEBUG)
+ ThreadRegistryLock l(ctx->thread_registry);
+ if (!flags()->report_bugs || thr->suppress_reports)
+ return false;
+ switch (typ) {
+ case ReportTypeSignalUnsafe:
+ return flags()->report_signal_unsafe;
+ case ReportTypeThreadLeak:
+#if !SANITIZER_GO
+ // It's impossible to join phantom threads
+ // in the child after fork.
+ if (ctx->after_multithreaded_fork)
+ return false;
+#endif
+ return flags()->report_thread_leaks;
+ case ReportTypeMutexDestroyLocked:
+ return flags()->report_destroy_locked;
+ default:
+ return true;
+ }
+}
+
ScopedReportBase::ScopedReportBase(ReportType typ, uptr tag) {
ctx->thread_registry->CheckLocked();
void *mem = internal_alloc(MBlockReport, sizeof(ReportDesc));
}
bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
- if (!flags()->report_bugs || thr->suppress_reports)
- return false;
+ // These should have been checked in ShouldReport.
+ // It's too late to check them here, we have already taken locks.
+ CHECK(flags()->report_bugs);
+ CHECK(!thr->suppress_reports);
atomic_store_relaxed(&ctx->last_symbolize_time_ns, NanoTime());
const ReportDesc *rep = srep.GetReport();
CHECK_EQ(thr->current_report, nullptr);
// at best it will cause deadlocks on internal mutexes.
ScopedIgnoreInterceptors ignore;
- if (!flags()->report_bugs)
+ if (!ShouldReport(thr, ReportTypeRace))
return;
if (!flags()->report_atomic_races && !RaceBetweenAtomicAndFree(thr))
return;
void ThreadFinalize(ThreadState *thr) {
ThreadCheckIgnore(thr);
#if !SANITIZER_GO
- if (!flags()->report_thread_leaks)
+ if (!ShouldReport(thr, ReportTypeThreadLeak))
return;
ThreadRegistryLock l(ctx->thread_registry);
Vector<ThreadLeak> leaks;
int main() {
barrier_init(&barrier, 2);
- pthread_atfork(atfork, NULL, NULL);
+ pthread_atfork(atfork, atfork, atfork);
pthread_t t;
pthread_create(&t, NULL, worker, NULL);
glob++;
-// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
// Regression test for
// https://groups.google.com/d/msg/thread-sanitizer/e_zB9gYqFHM/DmAiTsrLAwAJ
// pthread_atfork() callback triggers a data race and we deadlocked
int main() {
barrier_init(&barrier, 2);
- pthread_atfork(atfork, NULL, NULL);
+ pthread_atfork(atfork, atfork, atfork);
pthread_t t;
pthread_create(&t, NULL, worker, NULL);
barrier_wait(&barrier);
return 0;
}
-// CHECK-NOT: ThreadSanitizer: data race
+// CHECK: ThreadSanitizer: data race
+// CHECK: Write of size 4
+// CHECK: #0 atfork
+// CHECK: Previous write of size 4
+// CHECK: #0 worker
// CHECK: CHILD
// CHECK: PARENT
--- /dev/null
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
+// Regression test for
+// https://groups.google.com/g/thread-sanitizer/c/TQrr4-9PRYo/m/HFR4FMi6AQAJ
+#include "test.h"
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
+#include <signal.h>
+
+long glob = 0;
+
+void *worker(void *main) {
+ glob++;
+ // synchronize with main
+ barrier_wait(&barrier);
+ // synchronize with atfork
+ barrier_wait(&barrier);
+ pthread_kill((pthread_t)main, SIGPROF);
+ barrier_wait(&barrier);
+ // synchronize with afterfork
+ barrier_wait(&barrier);
+ pthread_kill((pthread_t)main, SIGPROF);
+ barrier_wait(&barrier);
+ return NULL;
+}
+
+void atfork() {
+ barrier_wait(&barrier);
+ barrier_wait(&barrier);
+ write(2, "in atfork\n", strlen("in atfork\n"));
+ static volatile long a;
+ __atomic_fetch_add(&a, 1, __ATOMIC_RELEASE);
+}
+
+void afterfork() {
+ barrier_wait(&barrier);
+ barrier_wait(&barrier);
+ write(2, "in afterfork\n", strlen("in afterfork\n"));
+ static volatile long a;
+ __atomic_fetch_add(&a, 1, __ATOMIC_RELEASE);
+}
+
+void afterfork_child() {
+ // Can't synchronize with barriers because we are
+ // in the new process, but want consistent output.
+ sleep(1);
+ write(2, "in afterfork_child\n", strlen("in afterfork_child\n"));
+ glob++;
+}
+
+void handler(int sig) {
+ write(2, "in handler\n", strlen("in handler\n"));
+ glob++;
+}
+
+int main() {
+ barrier_init(&barrier, 2);
+ struct sigaction act = {};
+ act.sa_handler = &handler;
+ if (sigaction(SIGPROF, &act, 0)) {
+ perror("sigaction");
+ exit(1);
+ }
+ pthread_atfork(atfork, afterfork, afterfork_child);
+ pthread_t t;
+ pthread_create(&t, NULL, worker, (void*)pthread_self());
+ barrier_wait(&barrier);
+ pid_t pid = fork();
+ if (pid < 0) {
+ fprintf(stderr, "fork failed: %d\n", errno);
+ return 1;
+ }
+ if (pid == 0) {
+ fprintf(stderr, "CHILD\n");
+ return 0;
+ }
+ if (pid != waitpid(pid, NULL, 0)) {
+ fprintf(stderr, "waitpid failed: %d\n", errno);
+ return 1;
+ }
+ pthread_join(t, NULL);
+ fprintf(stderr, "PARENT\n");
+ return 0;
+}
+
+// CHECK: in atfork
+// CHECK: in handler
+// CHECK: ThreadSanitizer: data race
+// CHECK: Write of size 8
+// CHECK: #0 handler
+// CHECK: Previous write of size 8
+// CHECK: #0 worker
+// CHECK: afterfork
+// CHECK: in handler
+// CHECK: afterfork_child
+// CHECK: CHILD
+// CHECK: PARENT