drm/amdgpu: Add a low priority scheduler for VRAM clearing
authorMukul Joshi <mukul.joshi@amd.com>
Wed, 17 May 2023 19:53:50 +0000 (15:53 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Jun 2023 14:54:40 +0000 (10:54 -0400)
Add a low priority DRM scheduler for VRAM clearing instead of using
the exisiting high priority scheduler. Use the high priority scheduler
for migrations and evictions.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

index 42c02f4..d9e3315 100644 (file)
@@ -627,7 +627,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
            bo->tbo.resource->mem_type == TTM_PL_VRAM) {
                struct dma_fence *fence;
 
-               r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence);
+               r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
                if (unlikely(r))
                        goto fail_unreserve;
 
@@ -1354,7 +1354,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
        if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
                return;
 
-       r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
+       r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
        if (!WARN_ON(r)) {
                amdgpu_bo_fence(abo, fence, false);
                dma_fence_put(fence);
index 0abad5f..473eeac 100644 (file)
@@ -383,7 +383,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
            (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
                struct dma_fence *wipe_fence = NULL;
 
-               r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence);
+               r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
+                                       false);
                if (r) {
                        goto error;
                } else if (wipe_fence) {
@@ -2036,8 +2037,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
                                  r);
                        return;
                }
+
+               r = drm_sched_entity_init(&adev->mman.delayed,
+                                         DRM_SCHED_PRIORITY_NORMAL, &sched,
+                                         1, NULL);
+               if (r) {
+                       DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
+                                 r);
+                       goto error_free_entity;
+               }
        } else {
                drm_sched_entity_destroy(&adev->mman.entity);
+               drm_sched_entity_destroy(&adev->mman.delayed);
                dma_fence_put(man->move);
                man->move = NULL;
        }
@@ -2049,6 +2060,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
                size = adev->gmc.visible_vram_size;
        man->size = size;
        adev->mman.buffer_funcs_enabled = enable;
+
+       return;
+
+error_free_entity:
+       drm_sched_entity_destroy(&adev->mman.entity);
 }
 
 static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
@@ -2056,14 +2072,16 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
                                  unsigned int num_dw,
                                  struct dma_resv *resv,
                                  bool vm_needs_flush,
-                                 struct amdgpu_job **job)
+                                 struct amdgpu_job **job,
+                                 bool delayed)
 {
        enum amdgpu_ib_pool_type pool = direct_submit ?
                AMDGPU_IB_POOL_DIRECT :
                AMDGPU_IB_POOL_DELAYED;
        int r;
-
-       r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
+       struct drm_sched_entity *entity = delayed ? &adev->mman.delayed :
+                                                   &adev->mman.entity;
+       r = amdgpu_job_alloc_with_ib(adev, entity,
                                     AMDGPU_FENCE_OWNER_UNDEFINED,
                                     num_dw * 4, pool, job);
        if (r)
@@ -2104,7 +2122,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
        num_loops = DIV_ROUND_UP(byte_count, max_bytes);
        num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
        r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
-                                  resv, vm_needs_flush, &job);
+                                  resv, vm_needs_flush, &job, false);
        if (r)
                return r;
 
@@ -2140,7 +2158,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
                               uint64_t dst_addr, uint32_t byte_count,
                               struct dma_resv *resv,
                               struct dma_fence **fence,
-                              bool vm_needs_flush)
+                              bool vm_needs_flush, bool delayed)
 {
        struct amdgpu_device *adev = ring->adev;
        unsigned int num_loops, num_dw;
@@ -2153,7 +2171,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
        num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
        num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
        r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
-                                  &job);
+                                  &job, delayed);
        if (r)
                return r;
 
@@ -2176,7 +2194,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                        uint32_t src_data,
                        struct dma_resv *resv,
-                       struct dma_fence **f)
+                       struct dma_fence **f,
+                       bool delayed)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
        struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
@@ -2205,7 +2224,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                        goto error;
 
                r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
-                                       &next, true);
+                                       &next, true, delayed);
                if (r)
                        goto error;
 
index 8ef048a..e82b1ed 100644 (file)
@@ -61,6 +61,8 @@ struct amdgpu_mman {
        struct mutex                            gtt_window_lock;
        /* Scheduler entity for buffer moves */
        struct drm_sched_entity                 entity;
+       /* Scheduler entity for VRAM clearing */
+       struct drm_sched_entity                 delayed;
 
        struct amdgpu_vram_mgr vram_mgr;
        struct amdgpu_gtt_mgr gtt_mgr;
@@ -152,7 +154,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                        uint32_t src_data,
                        struct dma_resv *resv,
-                       struct dma_fence **fence);
+                       struct dma_fence **fence,
+                       bool delayed);
 
 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
 void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);