From fa6f37c04391d0103c95e24813ad345c2d5c4b67 Mon Sep 17 00:00:00 2001 From: Frantisek Sumsal Date: Tue, 21 Nov 2023 09:37:21 +0100 Subject: [PATCH] test: don't redirect stdout/stderr to console Turns out that redirecting a lot of output to the console can have some funny effects, like random kernel soft lockups. I spotted this in various CIs, but it remained almost entirely hidden thanks to `softlockup_panic=1`, until 1a36d2672f which introduced a couple of tests that log quite a lot in a short amount of time. This, in combination with newer kernel version, which, for some reason, seem to be more susceptible to such soft lockups, made the Arch Linux jobs soft lockup quite a lot, see [0]. While debugging this I also noticed that runs which don't redirect stdout/stderr to the console are noticeably faster, e.g.: # TEST-71 nspawn + QEMU (KVM), StandardOutput=journal+console Elapsed (wall clock) time (h:mm:ss or m:ss): 0:24.64 # TEST-71 nspawn + QEMU (KVM), StandardOutput=journal Elapsed (wall clock) time (h:mm:ss or m:ss): 0:17.95 # TEST-71 nspawn + QEMU, StandardOutput=journal+console Elapsed (wall clock) time (h:mm:ss or m:ss): 2:04.70 # TEST-71 nspawn + QEMU, StandardOutput=journal Elapsed (wall clock) time (h:mm:ss or m:ss): 1:44.48 # TEST-04 QEMU, StandardOutput=journal+console Elapsed (wall clock) time (h:mm:ss or m:ss): 4:22.70 # TEST-04 QEMU, StandardOutput=console Elapsed (wall clock) time (h:mm:ss or m:ss): 5:04.67 Given all this, let's effectively revert ba7abf79a5, and dump the testsuite-related journal messages only after the test finishes, so they don't go through the slow console. Resolves: systemd/systemd-centos-ci#660 [0] https://github.com/systemd/systemd-centos-ci/issues/660 --- test/test-functions | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/test/test-functions b/test/test-functions index 674ba1c..e2d3f10 100644 --- a/test/test-functions +++ b/test/test-functions @@ -1032,7 +1032,6 @@ EOF DefaultEnvironment=${default_environment[*]} ManagerEnvironment=${manager_environment[*]} DefaultTimeoutStartSec=180s -DefaultStandardOutput=journal+console EOF # ASAN and syscall filters aren't compatible with each other. @@ -1785,10 +1784,10 @@ save_journal() { dest_name="system.journal" fi - if [[ -n "$TEST_SHOW_JOURNAL" ]]; then - echo "---- $source_dir ----" - "$JOURNALCTL" --no-pager -o short-monotonic --no-hostname --priority="$TEST_SHOW_JOURNAL" -D "$source_dir" - fi + # Show messages from the testsuite-XX.service or messages with priority "warning" and higher + echo " --- $source_dir ---" + "$JOURNALCTL" --no-pager --no-hostname -o short-monotonic -D "$source_dir" \ + _SYSTEMD_UNIT="testsuite-${TESTID:?}.service" + PRIORITY=4 + PRIORITY=3 + PRIORITY=2 + PRIORITY=1 + PRIORITY=0 if get_bool "$save"; then # If we don't have systemd-journal-remote copy all journals from /var/log/journal/ @@ -3339,18 +3338,6 @@ test_setup() { mask_supporting_services fi - # Send stdout/stderr of testsuite-*.service units to both journal and - # console to make debugging in CIs easier - # Note: we can't use a dropin for `testsuite-.service`, since that also - # overrides 'sub-units' of some tests that already use a specific - # value for Standard(Output|Error)= - # (e.g. test/units/testsuite-66-deviceisolation.service) - if ! get_bool "$INTERACTIVE_DEBUG"; then - local dropin_dir="${initdir:?}/etc/systemd/system/testsuite-${TESTID:?}.service.d" - mkdir -p "$dropin_dir" - printf '[Service]\nStandardOutput=journal+console\nStandardError=journal+console' >"$dropin_dir/99-stdout.conf" - fi - if get_bool "$IS_BUILT_WITH_COVERAGE"; then # Do an initial coverage capture, to make sure the final report includes # files that the tests didn't touch at all -- 2.7.4