{
uint32_t data = 0;
- data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
if (pdd->qpd.queue_count)
return -EEXIST;
+
+ /*
+ * Setup TTMPs by default.
+ * Note that this call must remain here for MES ADD QUEUE to
+ * skip_process_ctx_clear unconditionally as the first call to
+ * SET_SHADER_DEBUGGER clears any stale process context data
+ * saved in MES.
+ */
+ if (pdd->dev->kfd->shared_resources.enable_mes)
+ kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd);
+ kfd_dbg_set_mes_debug_mode(pdd,
+ !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
}
return r;
}
-int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
+int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
{
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
uint32_t flags = pdd->process->dbg_flags;
- bool sq_trap_en = !!spi_dbg_cntl || !kfd_dbg_has_cwsr_workaround(pdd->dev);
if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
return 0;
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_map_and_unlock(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
kfd_dbg_clear_dev_watch_id(pdd, watch_id);
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_map_and_unlock(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
/* HWS is broken so no point in HW rollback but release the watchpoint anyways */
if (r)
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r) {
target->dbg_flags = prev_flags;
if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd);
+ kfd_dbg_set_mes_debug_mode(pdd, true);
}
}
if (!pdd->dev->kfd->shared_resources.enable_mes)
debug_refresh_runlist(pdd->dev->dqm);
else
- kfd_dbg_set_mes_debug_mode(pdd);
+ kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
}
kfd_dbg_set_workaround(target, false);
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r) {
target->runtime_info.runtime_state =
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r)
break;
if (!pdd->dev->kfd->shared_resources.enable_mes)
r = debug_refresh_runlist(pdd->dev->dqm);
else
- r = kfd_dbg_set_mes_debug_mode(pdd);
+ r = kfd_dbg_set_mes_debug_mode(pdd, true);
if (r)
break;
return true;
}
-int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd);
+int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en);
+
+static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev)
+{
+ return (KFD_GC_VERSION(dev) < IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) ||
+ (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) &&
+ (dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70);
+}
#endif
queue_input.tma_addr = qpd->tma_addr;
queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
+ queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled ||
+ kfd_dbg_has_ttmps_always_setup(q->device);
queue_type = convert_to_mes_queue_type(q->properties.type);
if (queue_type < 0) {
#include "kfd_device_queue_manager.h"
#include "kfd_iommu.h"
#include "kfd_svm.h"
+#include "kfd_debug.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h"
#include "amdgpu.h"
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
+ if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
+ dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
+
if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
dev->node_props.debug_prop |=
HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
- if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 4, 2))
- dev->node_props.debug_prop |=
- HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
-
if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
- if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0))
- dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
- else
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
}