drm/amdgpu: Move scheduler init to after XGMI is ready
authorAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Mon, 6 Dec 2021 19:59:35 +0000 (14:59 -0500)
committerAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Wed, 9 Feb 2022 17:15:04 +0000 (12:15 -0500)
Before we initialize schedulers we must know which reset
domain are we in - for single device there iis a single
domain per device and so single wq per device. For XGMI
the reset domain spans the entire XGMI hive and so the
reset wq is per hive.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://www.spinics.net/lists/amd-gfx/msg74112.html
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 9704b0e..00123b0 100644 (file)
@@ -2287,6 +2287,47 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
        return r;
 }
 
+static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
+{
+       long timeout;
+       int r, i;
+
+       for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+               struct amdgpu_ring *ring = adev->rings[i];
+
+               /* No need to setup the GPU scheduler for rings that don't need it */
+               if (!ring || ring->no_scheduler)
+                       continue;
+
+               switch (ring->funcs->type) {
+               case AMDGPU_RING_TYPE_GFX:
+                       timeout = adev->gfx_timeout;
+                       break;
+               case AMDGPU_RING_TYPE_COMPUTE:
+                       timeout = adev->compute_timeout;
+                       break;
+               case AMDGPU_RING_TYPE_SDMA:
+                       timeout = adev->sdma_timeout;
+                       break;
+               default:
+                       timeout = adev->video_timeout;
+                       break;
+               }
+
+               r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+                                  ring->num_hw_submission, amdgpu_job_hang_limit,
+                                  timeout, adev->reset_domain.wq, ring->sched_score, ring->name);
+               if (r) {
+                       DRM_ERROR("Failed to create scheduler on ring %s.\n",
+                                 ring->name);
+                       return r;
+               }
+       }
+
+       return 0;
+}
+
+
 /**
  * amdgpu_device_ip_init - run init for hardware IPs
  *
@@ -2419,6 +2460,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
                }
        }
 
+       r = amdgpu_device_init_schedulers(adev);
+       if (r)
+               goto init_failed;
+
        /* Don't init kfd if whole hive need to be reset during init */
        if (!adev->gmc.xgmi.pending_reset)
                amdgpu_amdkfd_device_init(adev);
index 45977a7..5d13ed3 100644 (file)
@@ -446,24 +446,18 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
  * for the requested ring.
  *
  * @ring: ring to init the fence driver on
- * @num_hw_submission: number of entries on the hardware queue
- * @sched_score: optional score atomic shared with other schedulers
  *
  * Init the fence driver for the requested ring (all asics).
  * Helper function for amdgpu_fence_driver_init().
  */
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
-                                 unsigned num_hw_submission,
-                                 atomic_t *sched_score)
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
-       long timeout;
-       int r;
 
        if (!adev)
                return -EINVAL;
 
-       if (!is_power_of_2(num_hw_submission))
+       if (!is_power_of_2(ring->num_hw_submission))
                return -EINVAL;
 
        ring->fence_drv.cpu_addr = NULL;
@@ -474,41 +468,14 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 
        timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
 
-       ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1;
+       ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1;
        spin_lock_init(&ring->fence_drv.lock);
-       ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
+       ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *),
                                         GFP_KERNEL);
+
        if (!ring->fence_drv.fences)
                return -ENOMEM;
 
-       /* No need to setup the GPU scheduler for rings that don't need it */
-       if (ring->no_scheduler)
-               return 0;
-
-       switch (ring->funcs->type) {
-       case AMDGPU_RING_TYPE_GFX:
-               timeout = adev->gfx_timeout;
-               break;
-       case AMDGPU_RING_TYPE_COMPUTE:
-               timeout = adev->compute_timeout;
-               break;
-       case AMDGPU_RING_TYPE_SDMA:
-               timeout = adev->sdma_timeout;
-               break;
-       default:
-               timeout = adev->video_timeout;
-               break;
-       }
-
-       r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
-                          num_hw_submission, amdgpu_job_hang_limit,
-                          timeout, NULL, sched_score, ring->name);
-       if (r) {
-               DRM_ERROR("Failed to create scheduler on ring %s.\n",
-                         ring->name);
-               return r;
-       }
-
        return 0;
 }
 
index ab2351b..35bcb6d 100644 (file)
@@ -191,8 +191,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
                ring->adev = adev;
                ring->idx = adev->num_rings++;
                adev->rings[ring->idx] = ring;
-               r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission,
-                                                 sched_score);
+               ring->num_hw_submission = sched_hw_submission;
+               ring->sched_score = sched_score;
+               r = amdgpu_fence_driver_init_ring(ring);
                if (r)
                        return r;
        }
index fae7d18..48365da 100644 (file)
@@ -114,9 +114,7 @@ struct amdgpu_fence_driver {
 void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
 
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
-                                 unsigned num_hw_submission,
-                                 atomic_t *sched_score);
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
                                   struct amdgpu_irq_src *irq_src,
                                   unsigned irq_type);
@@ -251,6 +249,8 @@ struct amdgpu_ring {
        bool                    has_compute_vm_bug;
        bool                    no_scheduler;
        int                     hw_prio;
+       unsigned                num_hw_submission;
+       atomic_t                *sched_score;
 };
 
 #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))