From 1c8c59387cb5989b5494f346c8127feb9dff27bc Mon Sep 17 00:00:00 2001 From: Sung Yoon Whang Date: Fri, 20 Apr 2018 23:05:00 -0700 Subject: [PATCH] Disable GC Coop mode switching during fatal error handling during GC exception (#17710) --- src/vm/ceemain.cpp | 58 +++++++++++++++++++++++++++++++++++++++++++++++++---- src/vm/eepolicy.cpp | 15 +++++++++++++- src/vm/vars.hpp | 1 + 3 files changed, 69 insertions(+), 5 deletions(-) diff --git a/src/vm/ceemain.cpp b/src/vm/ceemain.cpp index 1d14293..b8c0adc 100644 --- a/src/vm/ceemain.cpp +++ b/src/vm/ceemain.cpp @@ -2923,6 +2923,7 @@ static void TerminateIPCManager(void) // Impl for UtilLoadStringRC Callback: In VM, we let the thread decide culture // copy culture name into szBuffer and return length // --------------------------------------------------------------------------- +extern BOOL g_fFatalErrorOccuredOnGCThread; static HRESULT GetThreadUICultureNames(__inout StringArrayList* pCultureNames) { CONTRACTL @@ -2945,7 +2946,23 @@ static HRESULT GetThreadUICultureNames(__inout StringArrayList* pCultureNames) Thread * pThread = GetThread(); - if (pThread != NULL) { + // When fatal errors have occured our invariants around GC modes may be broken and attempting to transition to co-op may hang + // indefinately. We want to ensure a clean exit so rather than take the risk of hang we take a risk of the error resource not + // getting localized with a non-default thread-specific culture. + // A canonical stack trace that gets here is a fatal error in the GC that comes through: + // coreclr.dll!GetThreadUICultureNames + // coreclr.dll!CCompRC::LoadLibraryHelper + // coreclr.dll!CCompRC::LoadLibrary + // coreclr.dll!CCompRC::GetLibrary + // coreclr.dll!CCompRC::LoadString + // coreclr.dll!CCompRC::LoadString + // coreclr.dll!SString::LoadResourceAndReturnHR + // coreclr.dll!SString::LoadResourceAndReturnHR + // coreclr.dll!SString::LoadResource + // coreclr.dll!EventReporter::EventReporter + // coreclr.dll!EEPolicy::LogFatalError + // coreclr.dll!EEPolicy::HandleFatalError + if (pThread != NULL && !g_fFatalErrorOccuredOnGCThread) { // Switch to cooperative mode, since we'll be looking at managed objects // and we don't want them moving on us. @@ -3075,8 +3092,24 @@ static int GetThreadUICultureId(__out LocaleIDValue* pLocale) Thread * pThread = GetThread(); - if (pThread != NULL) { - + // When fatal errors have occured our invariants around GC modes may be broken and attempting to transition to co-op may hang + // indefinately. We want to ensure a clean exit so rather than take the risk of hang we take a risk of the error resource not + // getting localized with a non-default thread-specific culture. + // A canonical stack trace that gets here is a fatal error in the GC that comes through: + // coreclr.dll!GetThreadUICultureNames + // coreclr.dll!CCompRC::LoadLibraryHelper + // coreclr.dll!CCompRC::LoadLibrary + // coreclr.dll!CCompRC::GetLibrary + // coreclr.dll!CCompRC::LoadString + // coreclr.dll!CCompRC::LoadString + // coreclr.dll!SString::LoadResourceAndReturnHR + // coreclr.dll!SString::LoadResourceAndReturnHR + // coreclr.dll!SString::LoadResource + // coreclr.dll!EventReporter::EventReporter + // coreclr.dll!EEPolicy::LogFatalError + // coreclr.dll!EEPolicy::HandleFatalError + if (pThread != NULL && !g_fFatalErrorOccuredOnGCThread) + { // Switch to cooperative mode, since we'll be looking at managed objects // and we don't want them moving on us. GCX_COOP(); @@ -3134,7 +3167,24 @@ static int GetThreadUICultureId(__out LocaleIDValue* pLocale) Thread * pThread = GetThread(); - if (pThread != NULL) { + // When fatal errors have occured our invariants around GC modes may be broken and attempting to transition to co-op may hang + // indefinately. We want to ensure a clean exit so rather than take the risk of hang we take a risk of the error resource not + // getting localized with a non-default thread-specific culture. + // A canonical stack trace that gets here is a fatal error in the GC that comes through: + // coreclr.dll!GetThreadUICultureNames + // coreclr.dll!CCompRC::LoadLibraryHelper + // coreclr.dll!CCompRC::LoadLibrary + // coreclr.dll!CCompRC::GetLibrary + // coreclr.dll!CCompRC::LoadString + // coreclr.dll!CCompRC::LoadString + // coreclr.dll!SString::LoadResourceAndReturnHR + // coreclr.dll!SString::LoadResourceAndReturnHR + // coreclr.dll!SString::LoadResource + // coreclr.dll!EventReporter::EventReporter + // coreclr.dll!EEPolicy::LogFatalError + // coreclr.dll!EEPolicy::HandleFatalError + if (pThread != NULL && !g_fFatalErrorOccuredOnGCThread) + { // Switch to cooperative mode, since we'll be looking at managed objects // and we don't want them moving on us. diff --git a/src/vm/eepolicy.cpp b/src/vm/eepolicy.cpp index 574943a..efa5fc0 100644 --- a/src/vm/eepolicy.cpp +++ b/src/vm/eepolicy.cpp @@ -1216,6 +1216,8 @@ inline void DoLogForFailFastException(LPCWSTR pszMessage, PEXCEPTION_POINTERS pE EX_END_CATCH(SwallowAllExceptions) } +//This starts FALSE and then converts to true if HandleFatalError has ever been called by a GC thread +BOOL g_fFatalErrorOccuredOnGCThread = FALSE; // // Log an error to the event log if possible, then throw up a dialog box. // @@ -1349,7 +1351,7 @@ void EEPolicy::LogFatalError(UINT exitCode, UINT_PTR address, LPCWSTR pszMessage //Give a managed debugger a chance if this fatal error is on a managed thread. Thread *pThread = GetThread(); - if (pThread) + if (pThread && !g_fFatalErrorOccuredOnGCThread) { GCX_COOP(); @@ -1491,6 +1493,9 @@ void DECLSPEC_NORETURN EEPolicy::HandleFatalStackOverflow(EXCEPTION_POINTERS *pE UNREACHABLE(); } + + + void DECLSPEC_NORETURN EEPolicy::HandleFatalError(UINT exitCode, UINT_PTR address, LPCWSTR pszMessage /* = NULL */, PEXCEPTION_POINTERS pExceptionInfo /* = NULL */, LPCWSTR errorSource /* = NULL */, LPCWSTR argExceptionString /* = NULL */) { WRAPPER_NO_CONTRACT; @@ -1526,6 +1531,14 @@ void DECLSPEC_NORETURN EEPolicy::HandleFatalError(UINT exitCode, UINT_PTR addres // All of the code from here on out is robust to any failures in any API's that are called. CONTRACT_VIOLATION(GCViolation | ModeViolation | SOToleranceViolation | FaultNotFatal | TakesLockViolation); + + // Setting g_fFatalErrorOccuredOnGCThread allows code to avoid attempting to make GC mode transitions which could + // block indefinately if the fatal error occured during the GC. + if (IsGCSpecialThread() && GCHeapUtilities::IsGCInProgress()) + { + g_fFatalErrorOccuredOnGCThread = TRUE; + } + // ThreadStore lock needs to be released before continuing with the FatalError handling should // because debugger is going to take CrstDebuggerMutex, whose lock level is higher than that of // CrstThreadStore. It should be safe to release the lock since execution will not be resumed diff --git a/src/vm/vars.hpp b/src/vm/vars.hpp index 6314355..50fbac0 100644 --- a/src/vm/vars.hpp +++ b/src/vm/vars.hpp @@ -524,6 +524,7 @@ EXTERN BOOL g_fComStarted; // GVAL_DECL(DWORD, g_fEEShutDown); EXTERN DWORD g_fFastExitProcess; +EXTERN BOOL g_fFatalErrorOccurredOnGCThread; #ifndef DACCESS_COMPILE EXTERN BOOL g_fSuspendOnShutdown; EXTERN BOOL g_fSuspendFinalizerOnShutdown; -- 2.7.4