drm/amdgpu: extend halt_if_hws_hang to MES
authorGraham Sider <Graham.Sider@amd.com>
Thu, 29 Sep 2022 15:57:34 +0000 (11:57 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 4 Nov 2022 20:05:53 +0000 (16:05 -0400)
Hang on MES timeout if halt_if_hws_hang is set to 1.

Signed-off-by: Graham Sider <Graham.Sider@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c

index 0e6ddf0..9999c18 100644 (file)
@@ -219,10 +219,12 @@ extern int amdgpu_use_xgmi_p2p;
 extern int sched_policy;
 extern bool debug_evictions;
 extern bool no_system_mem_limit;
+extern int halt_if_hws_hang;
 #else
 static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
 static const bool __maybe_unused debug_evictions; /* = false */
 static const bool __maybe_unused no_system_mem_limit;
+static const int __maybe_unused halt_if_hws_hang;
 #endif
 #ifdef CONFIG_HSA_AMD_P2P
 extern bool pcie_p2p;
index 1abdf8b..6143941 100644 (file)
@@ -121,6 +121,10 @@ static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
        if (r < 1) {
                DRM_ERROR("MES failed to response msg=%d\n",
                          x_pkt->header.opcode);
+
+               while (halt_if_hws_hang)
+                       schedule();
+
                return -ETIMEDOUT;
        }
 
index 8ca3417..1395453 100644 (file)
@@ -129,6 +129,10 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
        if (r < 1) {
                DRM_ERROR("MES failed to response msg=%d\n",
                          x_pkt->header.opcode);
+
+               while (halt_if_hws_hang)
+                       schedule();
+
                return -ETIMEDOUT;
        }