From 7af6616030d089010789758a004f712efb59e2fc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20Roukala=20=28n=C3=A9=20Peres=29?= Date: Thu, 20 Apr 2023 15:21:02 +0300 Subject: [PATCH] radv/ci: only reboot on hangs for vkcts-navi10-valve MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit vkcts-navi10-valve has the nasty habit on hanging the GPU, so we introduced an auto-retry... but for every radv job. Let's stop doing that, and instead limit the auto-retry to vkcts-navi10-valve only. Additionally, let's increase the number of attempts to 3 (2 retries), as sometimes, it may still fail and we don't want to flag it as a fail in nightly runs. Let's hope we'll get to the bottom of this hang sooner rather than later, so that we can remove this hack! Reviewed-by: Samuel Pitoiset Signed-off-by: Martin Roukala (né Peres) Part-of: --- .gitlab-ci/test/gitlab-ci.yml | 4 ++-- src/amd/ci/gitlab-ci.yml | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci/test/gitlab-ci.yml b/.gitlab-ci/test/gitlab-ci.yml index 21cbdf4..9c4cb17 100644 --- a/.gitlab-ci/test/gitlab-ci.yml +++ b/.gitlab-ci/test/gitlab-ci.yml @@ -248,9 +248,9 @@ clang-format: B2C_LOG_LEVEL: 6 B2C_POWEROFF_DELAY: 15 B2C_SESSION_END_REGEX: '^.*It''s now safe to turn off your computer\r$' - B2C_SESSION_REBOOT_REGEX: '(GPU hang detected!|\*ERROR\* ring [^\s]+ timeout(?!, but soft recovered)|The CS has been cancelled because the context is lost)' + B2C_SESSION_REBOOT_REGEX: '' B2C_TIMEOUT_BOOT_MINUTES: 45 - B2C_TIMEOUT_BOOT_RETRIES: 1 + B2C_TIMEOUT_BOOT_RETRIES: 0 B2C_TIMEOUT_FIRST_MINUTES: 5 B2C_TIMEOUT_FIRST_RETRIES: 3 B2C_TIMEOUT_MINUTES: 4 diff --git a/src/amd/ci/gitlab-ci.yml b/src/amd/ci/gitlab-ci.yml index 5fc3a17..10ecd24 100644 --- a/src/amd/ci/gitlab-ci.yml +++ b/src/amd/ci/gitlab-ci.yml @@ -355,6 +355,10 @@ vkcts-navi10-valve: # Use Linux 5.17 to work around hitting __vm_enough_memory then hanging on Linux 6.1 # See https://gitlab.freedesktop.org/mesa/mesa/-/issues/7888 for details B2C_KERNEL_URL: 'https://gitlab.freedesktop.org/mupuf/valve-infra/-/package_files/144/download' # 5.17.1 + # NAVI10 hangs quite often while running vkcts. This is being investigated, + # but in the mean time, retry up to 2 times. + B2C_SESSION_REBOOT_REGEX: '(GPU hang detected!|\*ERROR\* ring [^\s]+ timeout(?!, but soft recovered)|The CS has been cancelled because the context is lost)' + B2C_TIMEOUT_BOOT_RETRIES: 2 vkcts-navi21-valve: parallel: 3 -- 2.7.4