From cc5b509b9edacf7fa9af455b69af1f4357a668db Mon Sep 17 00:00:00 2001 From: Frederic Riss Date: Wed, 4 Sep 2019 16:13:12 +0000 Subject: [PATCH] Workaround TestConcurrentMany* flakiness in a more pricipled way The flakiness on our local machines seems to come for a race in the kernel between task_suspend and the creation of the Mach exceptions for the threads that hit breakpoints. The debugserver code is written with the assumption that the kernel will be able to provide us with all the exceptions for a given task once task_suspend returns. On machines with higher core counts, this seems not to be the case. The first batch of exceptions we get after task_suspend does not contain exceptions for all the threads that have hit a breakpoint, thus they get misreprorted in the first stop packet. Adding a 1ms timeout to the call that retrieves the batch of exceptions seems to workaround the issue reliably on our machines, and it shoulnd't impact standard debugging scenarios too much (a stop will incur an additional 1ms delay). We'll be talking to the kernel team to figure out the right contract for those APIs. This patch also reverts part of Jonas' previous workaround for the issue (r370785). llvm-svn: 370916 --- lldb/packages/Python/lldbsuite/test/make/pseudo_barrier.h | 1 - lldb/tools/debugserver/source/MacOSX/MachTask.mm | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/make/pseudo_barrier.h b/lldb/packages/Python/lldbsuite/test/make/pseudo_barrier.h index 8a54334..5a6b045 100644 --- a/lldb/packages/Python/lldbsuite/test/make/pseudo_barrier.h +++ b/lldb/packages/Python/lldbsuite/test/make/pseudo_barrier.h @@ -7,7 +7,6 @@ static inline void pseudo_barrier_wait(pseudo_barrier_t &barrier) { --barrier; while (barrier > 0) std::this_thread::yield(); - std::this_thread::sleep_for(std::chrono::milliseconds(100)); } static inline void pseudo_barrier_init(pseudo_barrier_t &barrier, int count) { diff --git a/lldb/tools/debugserver/source/MacOSX/MachTask.mm b/lldb/tools/debugserver/source/MacOSX/MachTask.mm index 6aa4fb2..0d5a63a 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachTask.mm +++ b/lldb/tools/debugserver/source/MacOSX/MachTask.mm @@ -754,7 +754,7 @@ void *MachTask::ExceptionThread(void *arg) { // to get all currently available exceptions for this task err = exception_message.Receive( mach_task->ExceptionPort(), - MACH_RCV_MSG | MACH_RCV_INTERRUPT | MACH_RCV_TIMEOUT, 0); + MACH_RCV_MSG | MACH_RCV_INTERRUPT | MACH_RCV_TIMEOUT, 1); } else if (periodic_timeout > 0) { // We need to stop periodically in this loop, so try and get a mach // message with a valid timeout (ms) -- 2.7.4