Use WARN to print messages with backtrace when evictions are triggered.
This can help determine the root cause of evictions and help spot driver
bugs triggering evictions unintentionally, or help with performance tuning
by avoiding conditions that cause evictions in a specific workload.
The messages are controlled by a new module parameter that can be changed
at runtime:
echo Y > /sys/module/amdgpu/parameters/debug_evictions
echo N > /sys/module/amdgpu/parameters/debug_evictions
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
extern int amdgpu_force_asic_type;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
+extern bool debug_evictions;
#else
static const int sched_policy = KFD_SCHED_POLICY_HWS;
+static const bool debug_evictions; /* = false */
#endif
extern int amdgpu_tmz;
int queue_preemption_timeout_ms = 9000;
module_param(queue_preemption_timeout_ms, int, 0644);
MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
+
+/**
+ * DOC: debug_evictions(bool)
+ * Enable extra debug messages to help determine the cause of evictions
+ */
+bool debug_evictions;
+module_param(debug_evictions, bool, 0644);
+MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)");
#endif
/**
continue;
}
+ WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+ "Adding eviction fence to sync obj");
r = amdgpu_sync_fence(sync, f, false);
if (r)
break;
if (!p)
return -ESRCH;
+ WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
r = kfd_process_evict_queues(p);
kfd_unref_process(p);
/* During process initialization eviction_work.dwork is initialized
* to kfd_evict_bo_worker
*/
+ WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
+ p->lead_thread->pid, delay_jiffies);
schedule_delayed_work(&p->eviction_work, delay_jiffies);
out:
kfd_unref_process(p);
*/
extern int queue_preemption_timeout_ms;
+/*
+ * Enable eviction debug messages
+ */
+extern bool debug_evictions;
+
enum cache_policy {
cache_policy_coherent,
cache_policy_noncoherent