drm/amdgpu: add amdgpu_error_* debugfs file
authorChristian König <christian.koenig@amd.com>
Wed, 19 Apr 2023 10:51:41 +0000 (12:51 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 15 Jun 2023 15:37:54 +0000 (11:37 -0400)
This allows us to insert some error codes into the bottom of the pipeline
on an engine.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 876ec35..0c6ec9c 100644 (file)
@@ -692,6 +692,30 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
 }
 
 /**
+ * amdgpu_fence_driver_set_error - set error code on fences
+ * @ring: the ring which contains the fences
+ * @error: the error code to set
+ *
+ * Set an error code to all the fences pending on the ring.
+ */
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error)
+{
+       struct amdgpu_fence_driver *drv = &ring->fence_drv;
+       unsigned long flags;
+
+       spin_lock_irqsave(&drv->lock, flags);
+       for (unsigned int i = 0; i <= drv->num_fences_mask; ++i) {
+               struct dma_fence *fence;
+
+               fence = rcu_dereference_protected(drv->fences[i],
+                                                 lockdep_is_held(&drv->lock));
+               if (fence && !dma_fence_is_signaled_locked(fence))
+                       dma_fence_set_error(fence, error);
+       }
+       spin_unlock_irqrestore(&drv->lock, flags);
+}
+
+/**
  * amdgpu_fence_driver_force_completion - force signal latest fence of ring
  *
  * @ring: fence of the ring to signal
index da26c55..9fd55a1 100644 (file)
@@ -561,6 +561,17 @@ static const struct file_operations amdgpu_debugfs_mqd_fops = {
        .llseek = default_llseek
 };
 
+static int amdgpu_debugfs_ring_error(void *data, u64 val)
+{
+       struct amdgpu_ring *ring = data;
+
+       amdgpu_fence_driver_set_error(ring, val);
+       return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(amdgpu_debugfs_error_fops, NULL,
+                               amdgpu_debugfs_ring_error, "%lld\n");
+
 #endif
 
 void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
@@ -582,6 +593,11 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
                                         &amdgpu_debugfs_mqd_fops,
                                         ring->mqd_size);
        }
+
+       sprintf(name, "amdgpu_error_%s", ring->name);
+       debugfs_create_file(name, 0200, root, ring,
+                           &amdgpu_debugfs_error_fops);
+
 #endif
 }
 
index 21ffb9c..028ff07 100644 (file)
@@ -126,6 +126,7 @@ struct amdgpu_fence_driver {
 extern const struct drm_sched_backend_ops amdgpu_sched_ops;
 
 void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
 
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);