This is a follow-up for D70378 (Cover usage of LLD as a library).
While debugging an intermittent failure on a bot, I recalled this scenario which
causes the issue:
1.When executing lld/test/ELF/invalid/symtab-sh-info.s L45, we reach
lld::elf::Obj-File::ObjFile() which goes straight into its base ELFFileBase(),
then ELFFileBase::init().
2.At that point fatal() is thrown in lld/ELF/InputFiles.cpp L381, leaving a
half-initialized ObjFile instance.
3.We then end up in lld::exitLld() and since we are running with LLD_IN_TEST, we
hapily restore the control flow to CrashRecoveryContext::RunSafely() then back
in lld::safeLldMain().
4.Before this patch, we called errorHandler().reset() just after, and this
attempted to reset the associated SpecificAlloc<ObjFile<ELF64LE>>. That tried
to free the half-initialized ObjFile instance, and more precisely its
ObjFile::dwarf member.
Sometimes that worked, sometimes it failed and was catched by the
CrashRecoveryContext. This scenario was the reason we called
errorHandler().reset() through a CrashRecoveryContext.
But in some rare cases, the above repro somehow corrupted the heap, creating a
stack overflow. When the CrashRecoveryContext's filter (that is,
__except (ExceptionFilter(GetExceptionInformation()))) tried to handle the
exception, it crashed again since the stack was exhausted -- and that took the
whole application down. That is the issue seen on the bot. Locally it happens
about 1 times out of 15.
Now this situation can happen anywhere in LLD. Since catching stack overflows is
not a reliable scenario ATM when using CrashRecoveryContext, we're now
preventing further re-entrance when such failures occur, by signaling
lld::SafeReturn::canRunAgain=false. When running with LLD_IN_TEST=2 (or above),
only one iteration will be executed, instead of two.
Differential Revision: https://reviews.llvm.org/D88348
if (errorHandler().outputBuffer)
errorHandler().outputBuffer->discard();
+ // Re-throw a possible signal or exception once/if it was catched by
+ // safeLldMain().
+ CrashRecoveryContext::throwIfCrash(val);
+
// Dealloc/destroy ManagedStatic variables before calling _exit().
// In an LTO build, allows us to get the output of -time-passes.
// Ensures that the thread pool for the parallel algorithms is stopped to
lld::outs().flush();
lld::errs().flush();
}
- llvm::sys::Process::Exit(val);
+ // When running inside safeLldMain(), restore the control flow back to the
+ // CrashRecoveryContext. Otherwise simply use _exit(), meanning no cleanup,
+ // since we want to avoid further crashes on shutdown.
+ llvm::sys::Process::Exit(val, /*NoCleanup=*/true);
}
void lld::diagnosticHandler(const DiagnosticInfo &di) {
// Generic entry point when using LLD as a library, safe for re-entry, supports
// crash recovery. Returns a general completion code and a boolean telling
// whether it can be called again. In some cases, a crash could corrupt memory
-// and re-entry would not be possible anymore.
+// and re-entry would not be possible anymore. Use exitLld() in that case to
+// properly exit your application and avoid intermittent crashes on exit caused
+// by cleanup.
SafeReturn safeLldMain(int argc, const char **argv, llvm::raw_ostream &stdoutOS,
llvm::raw_ostream &stderrOS);
// REQUIRES: arm
// RUN: llvm-mc -filetype=obj -triple=thumbv7a-windows-gnu %s -o %t.obj
-// RUN: not lld-link -entry:_start -subsystem:console %t.obj -out:%t.exe 2>&1 | FileCheck %s
+// RUN: env LLD_IN_TEST=1 not lld-link -entry:_start -subsystem:console %t.obj -out:%t.exe 2>&1 | FileCheck %s
.syntax unified
.globl _start
_start:
# RUN: cp %t.same_size.obj %t.obj && lld-link /dll /noentry /nodefaultlib %t.same_size.obj %t.obj
# RUN: cp %t.same_contents.obj %t.obj && lld-link /dll /noentry /nodefaultlib %t.same_contents.obj %t.obj
# RUN: cp %t.largest.obj %t.obj && lld-link /dll /noentry /nodefaultlib %t.largest.obj %t.obj
-# RUN: cp %t.newest.obj %t.obj && not lld-link /dll /noentry /nodefaultlib %t.newest.obj %t.obj 2>&1 | FileCheck --check-prefix=NEWNEW %s
+# RUN: cp %t.newest.obj %t.obj && env LLD_IN_TEST=1 not lld-link /dll /noentry /nodefaultlib %t.newest.obj %t.obj 2>&1 | FileCheck --check-prefix=NEWNEW %s
# NEWNEW: lld-link: error: unknown comdat type 7 for symbol
# /force doesn't affect errors about unknown comdat types.
-# RUN: cp %t.newest.obj %t.obj && not lld-link /force /dll /noentry /nodefaultlib %t.newest.obj %t.obj 2>&1 | FileCheck --check-prefix=NEWNEWFORCE %s
+# RUN: cp %t.newest.obj %t.obj && env LLD_IN_TEST=1 not lld-link /force /dll /noentry /nodefaultlib %t.newest.obj %t.obj 2>&1 | FileCheck --check-prefix=NEWNEWFORCE %s
# NEWNEWFORCE: lld-link: error: unknown comdat type 7 for symbol
# Check that same_size, same_contents, largest do what they're supposed to.
# RUN: lld-link /out:%t.dir/foo.dll /dll %t1.obj /export:datasym,DATA /noentry
# RUN: yaml2obj %s -o %t2.obj
-# RUN: not lld-link /out:%t.exe /entry:main %t2.obj %t.dir/foo.lib /delayload:foo.dll \
+# RUN: env LLD_IN_TEST=1 not lld-link /out:%t.exe /entry:main %t2.obj \
+# RUN: %t.dir/foo.lib /delayload:foo.dll \
# RUN: /alternatename:__delayLoadHelper2=main /opt:noref >& %t.log
# RUN: FileCheck %s < %t.log
# REQUIRES: system-windows
-# RUN: not LLD-LINK 2>&1 | FileCheck %s
+# RUN: env LLD_IN_TEST=1 not LLD-LINK 2>&1 | FileCheck %s
CHECK: no input files
# RUN: lld-link /lib /help | FileCheck -check-prefix=LIBHELP %s
LIBHELP: OVERVIEW: LLVM Lib
-# RUN: not lld-link /WX /lib 2>&1 | FileCheck -check-prefix=LIBBAD %s
+# RUN: env LLD_IN_TEST=1 not lld-link /WX /lib 2>&1 | FileCheck -check-prefix=LIBBAD %s
LIBBAD: ignoring /lib since it's not the first argument
# RUN: yaml2obj %p/Inputs/hello32.yaml -o %t.obj
# RUN: not lld-link /out:/ %t.obj 2>&1 | FileCheck -check-prefix=DIR %s
DIR: cannot open output file
-# RUN: not lld-link -version 2>&1 | FileCheck -check-prefix=SPELLVERSION %s
+# RUN: env LLD_IN_TEST=1 not lld-link -version 2>&1 | FileCheck -check-prefix=SPELLVERSION %s
SPELLVERSION: ignoring unknown argument '-version', did you mean '--version'
SPELLVERSION: no input files
-# RUN: not lld-link -nodefaultlibs 2>&1 | FileCheck -check-prefix=SPELLNODEFAULTLIB %s
+# RUN: env LLD_IN_TEST=1 not lld-link -nodefaultlibs 2>&1 | FileCheck -check-prefix=SPELLNODEFAULTLIB %s
SPELLNODEFAULTLIB: ignoring unknown argument '-nodefaultlibs', did you mean '-nodefaultlib'
SPELLNODEFAULTLIB: no input files
# RUN: %python %p/Inputs/def-many.py 65536 > %t-65536.def
# RUN: llvm-mc -triple x86_64-win32 %s -filetype=obj -o %t.obj
# RUN: lld-link -dll -noentry %t.obj -out:%t.dll -def:%t-65535.def
-# RUN: not lld-link -dll -noentry %t.obj -out:%t.dll -def:%t-65536.def 2>&1 | FileCheck %s
+# RUN: env LLD_IN_TEST=1 not lld-link -dll -noentry %t.obj -out:%t.dll -def:%t-65536.def 2>&1 | FileCheck %s
# CHECK: error: too many exported symbols
RUN: lld-link /entry:main /subsystem:console /out:%t.exe \
RUN: %p/Inputs/ret42.obj /failifmismatch:k1=v1 /failifmismatch:k1=v1
-RUN: not lld-link /entry:main /subsystem:console /out:%t.exe \
+RUN: env LLD_IN_TEST=1 not lld-link /entry:main /subsystem:console /out:%t.exe \
RUN: %p/Inputs/ret42.obj /failifmismatch:k1=v1 /failifmismatch:k1=v2 2>&1 | FileCheck %s
RUN: llc < %p/Inputs/failmismatch1.ll -mtriple x86_64-windows-msvc -filetype obj -o %t1.obj
RUN: llc < %p/Inputs/failmismatch2.ll -mtriple x86_64-windows-msvc -filetype obj -o %t2.obj
-RUN: not lld-link %t1.obj %t2.obj 2>&1 | FileCheck %s -check-prefix OBJ
+RUN: env LLD_IN_TEST=1 not lld-link %t1.obj %t2.obj 2>&1 | FileCheck %s -check-prefix OBJ
RUN: llvm-lib %t1.obj /out:%t.lib
-RUN: not lld-link %t.lib %t2.obj 2>&1 | FileCheck %s -check-prefix LIB
+RUN: env LLD_IN_TEST=1 not lld-link %t.lib %t2.obj 2>&1 | FileCheck %s -check-prefix LIB
CHECK: lld-link: error: /failifmismatch: mismatch detected for 'k1':
CHECK-NEXT: >>> cmd-line has value v1
# RUN: yaml2obj %s -o %t.obj
-# RUN: not lld-link %t.obj 2>&1 | FileCheck %s
+# RUN: env LLD_IN_TEST=1 not lld-link %t.obj 2>&1 | FileCheck %s
# CHECK: getSectionName failed: #1:
# RUN: yaml2obj %s -o %t.obj
-# RUN: not lld-link %t.obj 2>&1 | FileCheck %s
+# RUN: env LLD_IN_TEST=1 not lld-link %t.obj 2>&1 | FileCheck %s
# CHECK: foo should not refer to special section -10
CHECK: OVERVIEW: LLVM Linker
-# RUN: env LINK=-help not lld-link /lldignoreenv 2>&1 | \
+# RUN: env LLD_IN_TEST=1 LINK=-help not lld-link /lldignoreenv 2>&1 | \
# RUN: FileCheck --check-prefix=ERR %s
ERR: error: no input files
# UNSUPPORTED: libxml2
# RUN: yaml2obj %p/Inputs/ret42.yaml -o %t.obj
-# RUN: not lld-link /out:%t.exe /entry:main \
+# RUN: env LLD_IN_TEST=1 not lld-link /out:%t.exe /entry:main \
# RUN: /manifest:embed \
# RUN: /manifestuac:"level='requireAdministrator'" \
# RUN: /manifestinput:%p/Inputs/manifestinput.test %t.obj 2>&1 | FileCheck %s
# RUN: /merge:.foo=.bar /merge:.bar=.abc %t.obj /debug
# RUN: llvm-readobj --sections %t.exe | FileCheck --check-prefix=CHECK2 %s
-# RUN: not lld-link /out:%t.exe /entry:main /subsystem:console /force \
+# RUN: env LLD_IN_TEST=1 not lld-link /out:%t.exe /entry:main /subsystem:console /force \
# RUN: /merge:.rsrc=.foo %t.obj /debug 2>&1 | FileCheck --check-prefix=NO-RSRC %s
-# RUN: not lld-link /out:%t.exe /entry:main /subsystem:console /force \
+# RUN: env LLD_IN_TEST=1 not lld-link /out:%t.exe /entry:main /subsystem:console /force \
# RUN: /merge:.foo=.rsrc %t.obj /debug 2>&1 | FileCheck --check-prefix=NO-RSRC %s
-# RUN: not lld-link /out:%t.exe /entry:main /subsystem:console /force \
+# RUN: env LLD_IN_TEST=1 not lld-link /out:%t.exe /entry:main /subsystem:console /force \
# RUN: /merge:.reloc=.foo %t.obj /debug 2>&1 | FileCheck --check-prefix=NO-RELOC %s
-# RUN: not lld-link /out:%t.exe /entry:main /subsystem:console /force \
+# RUN: env LLD_IN_TEST=1 not lld-link /out:%t.exe /entry:main /subsystem:console /force \
# RUN: /merge:.foo=.reloc %t.obj /debug 2>&1 | FileCheck --check-prefix=NO-RELOC %s
-# RUN: not lld-link /out:%t.exe /entry:main /subsystem:console /force \
+# RUN: env LLD_IN_TEST=1 not lld-link /out:%t.exe /entry:main /subsystem:console /force \
# RUN: /merge:.foo=.foo1 /merge:.foo1=.foo %t.obj /debug 2>&1 | FileCheck --check-prefix=NO-CYCLE %s
-# RUN: not lld-link /out:%t.exe /entry:main /subsystem:console /force \
+# RUN: env LLD_IN_TEST=1 not lld-link /out:%t.exe /entry:main /subsystem:console /force \
# RUN: /merge:.foo=.foo1 /merge:.foo1=.foo2 /merge:.foo2=.foo1 %t.obj /debug 2>&1 | FileCheck --check-prefix=NO-CYCLE %s
# CHECK: Name: .def
# RUN: yaml2obj %s -o %t.obj
-# RUN: not lld-link /out:%t.exe /entry:func1 /subsystem:console %t.obj 2>&1 | FileCheck %s
+# RUN: env LLD_IN_TEST=1 not lld-link /out:%t.exe /entry:func1 /subsystem:console %t.obj 2>&1 | FileCheck %s
# This file is like pdata-arm64.yaml, except that .pdata has been extended with
# 4 bytes. This can happen due to for example bad assembler input. Check that
RUN: yaml2obj precomp-no-objname.yaml -o %t.precomp-no-objname.obj
RUN: yaml2obj precomp-zero-sig.yaml -o %t.precomp-zero-sig.obj
-RUN: not lld-link %t.precomp-no-objname.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe 2>&1 | FileCheck %s -check-prefix FAILURE-NO-SIGNATURE
+RUN: env LLD_IN_TEST=1 not lld-link %t.precomp-no-objname.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe 2>&1 | FileCheck %s -check-prefix FAILURE-NO-SIGNATURE
-RUN: not lld-link %t.precomp-zero-sig.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe 2>&1 | FileCheck %s -check-prefix FAILURE-NO-SIGNATURE
+RUN: env LLD_IN_TEST=1 not lld-link %t.precomp-zero-sig.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe 2>&1 | FileCheck %s -check-prefix FAILURE-NO-SIGNATURE
FAILURE-NO-SIGNATURE: error: {{.*}}.obj claims to be a PCH object, but does not have a valid signature
RUN: cp %S/Inputs/precomp.obj %t.precomp-dup.obj
-RUN: not lld-link %S/Inputs/precomp.obj %t.precomp-dup.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe 2>&1 | FileCheck %s -check-prefix FAILURE-DUP-SIGNATURE
+RUN: env LLD_IN_TEST=1 not lld-link %S/Inputs/precomp.obj %t.precomp-dup.obj %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe 2>&1 | FileCheck %s -check-prefix FAILURE-DUP-SIGNATURE
FAILURE-DUP-SIGNATURE: error: a PCH object with the same signature has already been provided ({{.*precomp.obj and .*precomp-dup.obj.*}})
# RUN: rm %t.lib.obj
# RUN: lld-link /entry:main %t.main.obj %t.lib /out:%t.exe 2>&1 | \
# RUN: FileCheck --allow-empty %s
-# RUN: not lld-link /entry:main %t.main.obj %t_thin.lib /out:%t.exe 2>&1 | \
-# RUN: FileCheck --check-prefix=NOOBJ %s
-# RUN: not lld-link /entry:main %t.main.obj %t_thin.lib /out:%t.exe \
+# RUN: env LLD_IN_TEST=1 not lld-link /entry:main %t.main.obj %t_thin.lib \
+# RUN: /out:%t.exe 2>&1 | FileCheck --check-prefix=NOOBJ %s
+# RUN: env LLD_IN_TEST=1 not lld-link /entry:main %t.main.obj %t_thin.lib /out:%t.exe \
# RUN: /demangle:no 2>&1 | FileCheck --check-prefix=NOOBJNODEMANGLE %s
# CHECK-NOT: error: could not get the buffer for the member defining
# RUN: llvm-mc -triple=x86_64-win32 %s -filetype=obj -o %t.main.obj
# RUN: llvm-mc -triple=x86_64-win32 %p/Inputs/otherFunc.s -filetype=obj -o %t.other.obj
# RUN: llvm-ar rcs %t.other.lib %t.other.obj
-# RUN: not lld-link -out:%t.exe -entry:main %t.main.obj %p/Inputs/std64.lib %t.other.lib -opt:noref 2>&1 | FileCheck %s
+# RUN: env LLD_IN_TEST=1 not lld-link -out:%t.exe -entry:main %t.main.obj \
+# RUN: %p/Inputs/std64.lib %t.other.lib -opt:noref 2>&1 | FileCheck %s
# CHECK: MessageBoxA was replaced
.global main
if (!crc.RunSafely([&]() {
r = lldMain(argc, argv, stdoutOS, stderrOS, /*exitEarly=*/false);
}))
- r = crc.RetCode;
+ return {crc.RetCode, /*canRunAgain=*/false};
}
// Cleanup memory and reset everything back in pristine condition. This path
// Execute one iteration.
auto r = safeLldMain(argc, argv, llvm::outs(), llvm::errs());
if (!r.canRunAgain)
- _exit(r.ret); // Exit now, can't re-execute again.
+ exitLld(r.ret); // Exit now, can't re-execute again.
if (!mainRet) {
mainRet = r.ret;
return r.ret;
}
}
-#if LLVM_ON_UNIX
- // Re-throw the signal so it can be caught by WIFSIGNALED in
- // llvm/lib/Support/Unix/Program.inc. This is required to correctly handle
- // usages of `not --crash`.
- if (*mainRet > 128) {
- llvm::sys::unregisterHandlers();
- raise(*mainRet - 128);
- }
-#endif
return *mainRet;
}
LLVM_ATTRIBUTE_NORETURN
void HandleExit(int RetCode);
+ /// Throw again a signal or an exception, after it was catched once by a
+ /// CrashRecoveryContext.
+ static bool throwIfCrash(int RetCode);
+
/// In case of a crash, this is the crash identifier.
int RetCode = 0;
/// Equivalent to ::exit(), except when running inside a CrashRecoveryContext.
/// In that case, the control flow will resume after RunSafely(), like for a
/// crash, rather than exiting the current process.
+ /// Use \arg NoCleanup for calling _exit() instead of exit().
LLVM_ATTRIBUTE_NORETURN
- static void Exit(int RetCode);
+ static void Exit(int RetCode, bool NoCleanup = false);
};
}
llvm_unreachable("Most likely setjmp wasn't called!");
}
+bool CrashRecoveryContext::throwIfCrash(int RetCode) {
+#if defined(_WIN32)
+ // On Windows, the high bits are reserved for kernel return codes. Values
+ // starting with 0x80000000 are reserved for "warnings"; values of 0xC0000000
+ // and up are for "errors". In practice, both are interpreted as a
+ // non-continuable signal.
+ unsigned Code = ((unsigned)RetCode & 0xF0000000) >> 28;
+ if (Code != 0xC && Code != 8)
+ return false;
+ ::RaiseException(RetCode, 0, 0, NULL);
+#else
+ // On Unix, signals are represented by return codes of 128 or higher.
+ if (RetCode <= 128)
+ return false;
+ llvm::sys::unregisterHandlers();
+ raise(RetCode - 128);
+#endif
+ return true;
+}
+
// FIXME: Portability.
static void setThreadBackgroundPriority() {
#ifdef __APPLE__
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
+#include <stddef.h> // for _Exit
+
using namespace llvm;
using namespace sys;
bool Process::AreCoreFilesPrevented() { return coreFilesPrevented; }
LLVM_ATTRIBUTE_NORETURN
-void Process::Exit(int RetCode) {
+void Process::Exit(int RetCode, bool NoCleanup) {
if (CrashRecoveryContext *CRC = CrashRecoveryContext::GetCurrent())
CRC->HandleExit(RetCode);
- ::exit(RetCode);
+
+ if (NoCleanup)
+ _Exit(RetCode);
+ else
+ ::exit(RetCode);
}
// Include the platform-specific parts of this class.