From b5ff187b7b51dd76f881e10c1c2b4033e672fb12 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 12 Nov 2021 19:28:39 +0100 Subject: [PATCH] tsan: mmap shadow stack We used to mmap C++ shadow stack as part of the trace region before ed7f3f5bc9 ("tsan: move shadow stack into ThreadState"), which moved the shadow stack into TLS. This started causing timeouts and OOMs on some of our internal tests that repeatedly create and destroy thousands of threads. Allocate C++ shadow stack with mmap and small pages again. This prevents the observed timeouts and OOMs. But we now need to be more careful with interceptors that run after thread finalization because FuncEntry/Exit and TraceAddEvent all need the shadow stack. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D113786 --- compiler-rt/lib/tsan/rtl/tsan_mman.cpp | 4 ++-- compiler-rt/lib/tsan/rtl/tsan_rtl.cpp | 14 +++++++++----- compiler-rt/lib/tsan/rtl/tsan_rtl.h | 9 ++++----- compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp | 15 ++++++++++----- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp index f1b6768..ef97ad0 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp @@ -220,7 +220,7 @@ void *user_reallocarray(ThreadState *thr, uptr pc, void *p, uptr size, uptr n) { void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) { DPrintf("#%d: alloc(%zu) = 0x%zx\n", thr->tid, sz, p); ctx->metamap.AllocBlock(thr, pc, p, sz); - if (write && thr->ignore_reads_and_writes == 0) + if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited) MemoryRangeImitateWrite(thr, pc, (uptr)p, sz); else MemoryResetRange(thr, pc, (uptr)p, sz); @@ -230,7 +230,7 @@ void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write) { CHECK_NE(p, (void*)0); uptr sz = ctx->metamap.FreeBlock(thr->proc(), p); DPrintf("#%d: free(0x%zx, %zu)\n", thr->tid, p, sz); - if (write && thr->ignore_reads_and_writes == 0) + if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited) MemoryRangeFreed(thr, pc, (uptr)p, sz); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index 46dec04..ff7726e 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -148,15 +148,19 @@ ThreadState::ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch, { CHECK_EQ(reinterpret_cast(this) % SANITIZER_CACHE_LINE_SIZE, 0); #if !SANITIZER_GO - shadow_stack_pos = shadow_stack; - shadow_stack_end = shadow_stack + kShadowStackSize; + // C/C++ uses fixed size shadow stack. + const int kInitStackSize = kShadowStackSize; + shadow_stack = static_cast( + MmapNoReserveOrDie(kInitStackSize * sizeof(uptr), "shadow stack")); + SetShadowRegionHugePageMode(reinterpret_cast(shadow_stack), + kInitStackSize * sizeof(uptr)); #else - // Setup dynamic shadow stack. + // Go uses malloc-allocated shadow stack with dynamic size. const int kInitStackSize = 8; - shadow_stack = (uptr *)Alloc(kInitStackSize * sizeof(uptr)); + shadow_stack = static_cast(Alloc(kInitStackSize * sizeof(uptr))); +#endif shadow_stack_pos = shadow_stack; shadow_stack_end = shadow_stack + kInitStackSize; -#endif } #if !SANITIZER_GO diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h index eab8370..c71b27e 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -159,12 +159,8 @@ struct ThreadState { #if !SANITIZER_GO IgnoreSet mop_ignore_set; IgnoreSet sync_ignore_set; - // C/C++ uses fixed size shadow stack. - uptr shadow_stack[kShadowStackSize]; -#else - // Go uses malloc-allocated shadow stack with dynamic size. - uptr *shadow_stack; #endif + uptr *shadow_stack; uptr *shadow_stack_end; uptr *shadow_stack_pos; RawShadow *racy_shadow_addr; @@ -616,6 +612,9 @@ void ALWAYS_INLINE TraceAddEvent(ThreadState *thr, FastState fs, EventType typ, u64 addr) { if (!kCollectHistory) return; + // TraceSwitch accesses shadow_stack, but it's called infrequently, + // so we check it here proactively. + DCHECK(thr->shadow_stack); DCHECK_GE((int)typ, 0); DCHECK_LE((int)typ, 7); DCHECK_EQ(GetLsb(addr, kEventPCBits), addr); diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp index 6e652ee..8532f5d 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp @@ -227,15 +227,11 @@ void ThreadFinish(ThreadState *thr) { if (thr->tls_addr && thr->tls_size) DontNeedShadowFor(thr->tls_addr, thr->tls_size); thr->is_dead = true; + thr->is_inited = false; ctx->thread_registry.FinishThread(thr->tid); } void ThreadContext::OnFinished() { -#if SANITIZER_GO - Free(thr->shadow_stack); - thr->shadow_stack_pos = nullptr; - thr->shadow_stack_end = nullptr; -#endif if (!detached) { thr->fast_state.IncrementEpoch(); // Can't increment epoch w/o writing to the trace as well. @@ -244,6 +240,15 @@ void ThreadContext::OnFinished() { } epoch1 = thr->fast_state.epoch(); +#if !SANITIZER_GO + UnmapOrDie(thr->shadow_stack, kShadowStackSize * sizeof(uptr)); +#else + Free(thr->shadow_stack); +#endif + thr->shadow_stack = nullptr; + thr->shadow_stack_pos = nullptr; + thr->shadow_stack_end = nullptr; + if (common_flags()->detect_deadlocks) ctx->dd->DestroyLogicalThread(thr->dd_lt); thr->clock.ResetCached(&thr->proc()->clock_cache); -- 2.7.4