From 16b670ec2027af8699dd182e95d33076914d00eb Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 28 Jan 2015 09:52:14 +0000 Subject: [PATCH] [LPM] Rip all of ManagedStatic and ThreadLocal out of the pretty stack tracing code. Managed static was just insane overhead for this. We took memory fences and external function calls in every path that pushed a pretty stack frame. This includes a multitude of layers setting up and tearing down passes, the parser in Clang, everywhere. For the regression test suite or low-overhead JITs, this was contributing to really significant overhead. Even the LLVM ThreadLocal is really overkill here because it uses pthread_{set,get}_specific logic, and has careful code to both allocate and delete the thread local data. We don't actually want any of that, and this code in particular has problems coping with deallocation. What we want is a single TLS pointer that is valid to use during global construction and during global destruction, any time we want. That is exactly what every host compiler and OS we use has implemented for a long time, and what was standardized in C++11. Even though not all of our host compilers support the thread_local keyword, we can directly use the platform-specific keywords to get the minimal functionality needed. Provided this limited trial survives the build bots, I will move this to Compiler.h so it is more widely available as a light weight if limited alternative to the ThreadLocal class. Many thanks to David Majnemer for helping me think through the implications across platforms and craft the MSVC-compatible syntax. The end result is *substantially* faster. When running llc in a tight loop over a small IR file targeting the aarch64 backend, this improves its performance by over 10% for me. It also seems likely to fix the remaining regressions seen by JIT users with threading enabled. This may actually have more impact on real-world compile times due to the use of the pretty stack tracing utility throughout the rest of Clang or LLVM, but I've not collected any detailed measurements. llvm-svn: 227300 --- llvm/lib/Support/PrettyStackTrace.cpp | 39 ++++++++++++++++------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp index 987778a..3483dfc 100644 --- a/llvm/lib/Support/PrettyStackTrace.cpp +++ b/llvm/lib/Support/PrettyStackTrace.cpp @@ -16,9 +16,7 @@ #include "llvm-c/Core.h" #include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" // Get autoconf configuration settings -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Signals.h" -#include "llvm/Support/ThreadLocal.h" #include "llvm/Support/Watchdog.h" #include "llvm/Support/raw_ostream.h" @@ -28,7 +26,18 @@ using namespace llvm; -static ManagedStatic > PrettyStackTraceHead; +// We need a thread local pointer to manage the stack of our stack trace +// objects, but we *really* cannot tolerate destructors running and do not want +// to pay any overhead of synchronizing. As a consequence, we use a raw +// thread-local variable. Some day, we should be able to use a limited subset +// of C++11's thread_local, but compilers aren't up for it today. +// FIXME: This should be moved to a Compiler.h abstraction. +#ifdef _MSC_VER // MSVC supports this with a __declspec. +static __declspec(thread) const PrettyStackTraceEntry + *PrettyStackTraceHead = nullptr; +#else // Clang, GCC, and all compatible compilers tend to use __thread. +static __thread const PrettyStackTraceEntry *PrettyStackTraceHead = nullptr; +#endif static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){ unsigned NextID = 0; @@ -46,12 +55,12 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){ /// PrintCurStackTrace - Print the current stack trace to the specified stream. static void PrintCurStackTrace(raw_ostream &OS) { // Don't print an empty trace. - if (!PrettyStackTraceHead->get()) return; + if (!PrettyStackTraceHead) return; // If there are pretty stack frames registered, walk and emit them. OS << "Stack dump:\n"; - PrintStack(PrettyStackTraceHead->get(), OS); + PrintStack(PrettyStackTraceHead, OS); OS.flush(); } @@ -101,26 +110,14 @@ static void CrashHandler(void *) { PrettyStackTraceEntry::PrettyStackTraceEntry() { // Link ourselves. - NextEntry = PrettyStackTraceHead->get(); - PrettyStackTraceHead->set(this); + NextEntry = PrettyStackTraceHead; + PrettyStackTraceHead = this; } PrettyStackTraceEntry::~PrettyStackTraceEntry() { - // Do nothing if PrettyStackTraceHead is uninitialized. This can only happen - // if a shutdown occurred after we created the PrettyStackTraceEntry. That - // does occur in the following idiom: - // - // PrettyStackTraceProgram X(...); - // llvm_shutdown_obj Y; - // - // Without this check, we may end up removing ourselves from the stack trace - // after PrettyStackTraceHead has already been destroyed. - if (!PrettyStackTraceHead.isConstructed()) - return; - - assert(PrettyStackTraceHead->get() == this && + assert(PrettyStackTraceHead == this && "Pretty stack trace entry destruction is out of order"); - PrettyStackTraceHead->set(getNextEntry()); + PrettyStackTraceHead = getNextEntry(); } void PrettyStackTraceString::print(raw_ostream &OS) const { -- 2.7.4