drm/amdkfd: fix and enable debugging for gfx11
authorJonathan Kim <jonathan.kim@amd.com>
Tue, 23 May 2023 15:57:27 +0000 (11:57 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Jun 2023 16:48:19 +0000 (12:48 -0400)
There are a couple of fixes required to enable gfx11 debugging.

First, ADD_QUEUE.trap_en is an inappropriate place to toggle
a per-process register so move it to SET_SHADER_DEBUGGER.trap_en.
When ADD_QUEUE.skip_process_ctx_clear is set, MES will prioritize
the SET_SHADER_DEBUGGER.trap_en setting.

Second, to preserve correct save/restore priviledged wave states
in coordination with the trap enablement setting, resume suspended
waves early in the disable call.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
drivers/gpu/drm/amd/amdkfd/kfd_debug.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
drivers/gpu/drm/amd/include/mes_v11_api_def.h

index 20cc3fffe921a71aefe8f8653255ed6c494dfc71..e9091ebfe230db24072a7abc0dc9ff1effd9267f 100644 (file)
@@ -928,7 +928,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
                                uint64_t process_context_addr,
                                uint32_t spi_gdbg_per_vmid_cntl,
                                const uint32_t *tcp_watch_cntl,
-                               uint32_t flags)
+                               uint32_t flags,
+                               bool trap_en)
 {
        struct mes_misc_op_input op_input = {0};
        int r;
@@ -945,6 +946,10 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
        memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
                        sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
 
+       if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
+                       AMDGPU_MES_API_VERSION_SHIFT) >= 14)
+               op_input.set_shader_debugger.trap_en = trap_en;
+
        amdgpu_mes_lock(&adev->mes);
 
        r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
index b5f5eed2b5efe663126e187060886fad94a2a003..2d6ac30b7135b894674224b826356c30ec3c8f3d 100644 (file)
@@ -294,6 +294,7 @@ struct mes_misc_op_input {
                        } flags;
                        uint32_t spi_gdbg_per_vmid_cntl;
                        uint32_t tcp_watch_cntl[4];
+                       uint32_t trap_en;
                } set_shader_debugger;
        };
 };
@@ -361,7 +362,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
                                uint64_t process_context_addr,
                                uint32_t spi_gdbg_per_vmid_cntl,
                                const uint32_t *tcp_watch_cntl,
-                               uint32_t flags);
+                               uint32_t flags,
+                               bool trap_en);
 
 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
                        int queue_type, int idx,
index c4e3cb8d44de3f2950e0457d84d1a45dc3a684de..1bdaa00c0b466ff2aabfcf77ae41f781ad4a69ef 100644 (file)
@@ -347,6 +347,7 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
                memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
                                input->set_shader_debugger.tcp_watch_cntl,
                                sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
+               misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
                break;
        default:
                DRM_ERROR("unsupported misc op (%d) \n", input->op);
index 125274445f4398da4bd16d8a82cb3cb4961c055b..cd34e7aaead490aeb48baeca185f248b28306dda 100644 (file)
@@ -349,12 +349,13 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
 {
        uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
        uint32_t flags = pdd->process->dbg_flags;
+       bool sq_trap_en = !!spi_dbg_cntl;
 
        if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
                return 0;
 
        return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
-                                               pdd->watch_points, flags);
+                                               pdd->watch_points, flags, sq_trap_en);
 }
 
 #define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1
@@ -557,6 +558,10 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
 
        if (!unwind) {
                uint32_t flags = 0;
+               int resume_count = resume_queues(target, 0, NULL);
+
+               if (resume_count)
+                       pr_debug("Resumed %d queues\n", resume_count);
 
                cancel_work_sync(&target->debug_event_workarea);
                kfd_dbg_clear_process_address_watch(target);
@@ -598,13 +603,6 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
        }
 
        kfd_dbg_set_workaround(target, false);
-
-       if (!unwind) {
-               int resume_count = resume_queues(target, 0, NULL);
-
-               if (resume_count)
-                       pr_debug("Resumed %d queues\n", resume_count);
-       }
 }
 
 static void kfd_dbg_clean_exception_status(struct kfd_process *target)
index 498ad7d4e7d906bb23ca4d1e75ab290cccdf1366..d6b15493fffdb4dda8890778ed12589b5fd64f63 100644 (file)
@@ -227,8 +227,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
        queue_input.tba_addr = qpd->tba_addr;
        queue_input.tma_addr = qpd->tma_addr;
        queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
-                             KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0) ||
-                             q->properties.is_dbg_wa;
+                             KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0);
        queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
 
        queue_type = convert_to_mes_queue_type(q->properties.type);
index faa7939f35bde5ecf1a651378dbd8ed132fa313a..90b86a6ac7bd63f030a1ccef73fa0e582cb19ea3 100644 (file)
@@ -1863,13 +1863,15 @@ static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *de
 {
        bool firmware_supported = true;
 
-       /*
-        * FIXME: GFX11 FW currently not sufficient to deal with CWSR WA.
-        * Updated FW with API changes coming soon.
-        */
        if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) &&
                        KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) {
-               firmware_supported = false;
+               uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version &
+                                               AMDGPU_MES_API_VERSION_MASK) >>
+                                               AMDGPU_MES_API_VERSION_SHIFT;
+               uint32_t mes_rev = dev->gpu->adev->mes.sched_version &
+                                               AMDGPU_MES_VERSION_MASK;
+
+               firmware_supported = (mes_api_rev >= 14) && (mes_rev >= 64);
                goto out;
        }
 
index f3c15f18ddb56b1110d7012846767e23d64a6059..0997e999416a48ec905379083fb4b2b0ba7b7f76 100644 (file)
@@ -575,6 +575,7 @@ struct SET_SHADER_DEBUGGER {
        } flags;
        uint32_t spi_gdbg_per_vmid_cntl;
        uint32_t tcp_watch_cntl[4]; /* TCP_WATCHx_CNTL */
+       uint32_t trap_en;
 };
 
 union MESAPI__MISC {