We used to mmap C++ shadow stack as part of the trace region
before
ed7f3f5bc9 ("tsan: move shadow stack into ThreadState"),
which moved the shadow stack into TLS. This started causing
timeouts and OOMs on some of our internal tests that repeatedly
create and destroy thousands of threads.
Allocate C++ shadow stack with mmap and small pages again.
This prevents the observed timeouts and OOMs.
But we now need to be more careful with interceptors that
run after thread finalization because FuncEntry/Exit and
TraceAddEvent all need the shadow stack.
Reviewed By: vitalybuka
Differential Revision: https://reviews.llvm.org/D113786
void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) {
DPrintf("#%d: alloc(%zu) = 0x%zx\n", thr->tid, sz, p);
ctx->metamap.AllocBlock(thr, pc, p, sz);
- if (write && thr->ignore_reads_and_writes == 0)
+ if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited)
MemoryRangeImitateWrite(thr, pc, (uptr)p, sz);
else
MemoryResetRange(thr, pc, (uptr)p, sz);
CHECK_NE(p, (void*)0);
uptr sz = ctx->metamap.FreeBlock(thr->proc(), p);
DPrintf("#%d: free(0x%zx, %zu)\n", thr->tid, p, sz);
- if (write && thr->ignore_reads_and_writes == 0)
+ if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited)
MemoryRangeFreed(thr, pc, (uptr)p, sz);
}
{
CHECK_EQ(reinterpret_cast<uptr>(this) % SANITIZER_CACHE_LINE_SIZE, 0);
#if !SANITIZER_GO
- shadow_stack_pos = shadow_stack;
- shadow_stack_end = shadow_stack + kShadowStackSize;
+ // C/C++ uses fixed size shadow stack.
+ const int kInitStackSize = kShadowStackSize;
+ shadow_stack = static_cast<uptr *>(
+ MmapNoReserveOrDie(kInitStackSize * sizeof(uptr), "shadow stack"));
+ SetShadowRegionHugePageMode(reinterpret_cast<uptr>(shadow_stack),
+ kInitStackSize * sizeof(uptr));
#else
- // Setup dynamic shadow stack.
+ // Go uses malloc-allocated shadow stack with dynamic size.
const int kInitStackSize = 8;
- shadow_stack = (uptr *)Alloc(kInitStackSize * sizeof(uptr));
+ shadow_stack = static_cast<uptr *>(Alloc(kInitStackSize * sizeof(uptr)));
+#endif
shadow_stack_pos = shadow_stack;
shadow_stack_end = shadow_stack + kInitStackSize;
-#endif
}
#if !SANITIZER_GO
#if !SANITIZER_GO
IgnoreSet mop_ignore_set;
IgnoreSet sync_ignore_set;
- // C/C++ uses fixed size shadow stack.
- uptr shadow_stack[kShadowStackSize];
-#else
- // Go uses malloc-allocated shadow stack with dynamic size.
- uptr *shadow_stack;
#endif
+ uptr *shadow_stack;
uptr *shadow_stack_end;
uptr *shadow_stack_pos;
RawShadow *racy_shadow_addr;
EventType typ, u64 addr) {
if (!kCollectHistory)
return;
+ // TraceSwitch accesses shadow_stack, but it's called infrequently,
+ // so we check it here proactively.
+ DCHECK(thr->shadow_stack);
DCHECK_GE((int)typ, 0);
DCHECK_LE((int)typ, 7);
DCHECK_EQ(GetLsb(addr, kEventPCBits), addr);
if (thr->tls_addr && thr->tls_size)
DontNeedShadowFor(thr->tls_addr, thr->tls_size);
thr->is_dead = true;
+ thr->is_inited = false;
ctx->thread_registry.FinishThread(thr->tid);
}
void ThreadContext::OnFinished() {
-#if SANITIZER_GO
- Free(thr->shadow_stack);
- thr->shadow_stack_pos = nullptr;
- thr->shadow_stack_end = nullptr;
-#endif
if (!detached) {
thr->fast_state.IncrementEpoch();
// Can't increment epoch w/o writing to the trace as well.
}
epoch1 = thr->fast_state.epoch();
+#if !SANITIZER_GO
+ UnmapOrDie(thr->shadow_stack, kShadowStackSize * sizeof(uptr));
+#else
+ Free(thr->shadow_stack);
+#endif
+ thr->shadow_stack = nullptr;
+ thr->shadow_stack_pos = nullptr;
+ thr->shadow_stack_end = nullptr;
+
if (common_flags()->detect_deadlocks)
ctx->dd->DestroyLogicalThread(thr->dd_lt);
thr->clock.ResetCached(&thr->proc()->clock_cache);