drm/amdgpu: change reset lock from mutex to rw_semaphore

author Dennis Li <Dennis.Li@amd.com>

Thu, 20 Aug 2020 02:06:32 +0000 (10:06 +0800)

committer Alex Deucher <alexander.deucher@amd.com>

Mon, 24 Aug 2020 16:23:48 +0000 (12:23 -0400)
author Dennis Li <Dennis.Li@amd.com>
Thu, 20 Aug 2020 02:06:32 +0000 (10:06 +0800)
committer Alex Deucher <alexander.deucher@amd.com>
Mon, 24 Aug 2020 16:23:48 +0000 (12:23 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 72f2a9aeaf0e5452a2e0326965101283251c7f8b..ba5e8635ca5fe3d116eb220c8edb100fd7fe3756 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -951,7 +951,7 @@ struct amdgpu_device {
  
         atomic_t                        in_gpu_reset;
         enum pp_mp1_state               mp1_state;
-       struct mutex  lock_reset;
+       struct rw_semaphore reset_sem;
         struct amdgpu_doorbell_index doorbell_index;
  
         struct mutex                    notifier_lock;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

index 79b397800cbc1e450b5acfbdf711ae7aa93fe17b..cc5c7f81c540460ae84a20f0764d80f47de42175 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -101,14 +101,18 @@ static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
  
         file->private_data = adev;
  
-       mutex_lock(&adev->lock_reset);
+       ret = down_read_killable(&adev->reset_sem);
+       if (ret)
+               return ret;
+
         if (adev->autodump.dumping.done) {
                 reinit_completion(&adev->autodump.dumping);
                 ret = 0;
         } else {
                 ret = -EBUSY;
         }
-       mutex_unlock(&adev->lock_reset);
+
+       up_read(&adev->reset_sem);
  
         return ret;
  }
@@ -1242,7 +1246,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
         }
  
         /* Avoid accidently unparking the sched thread during GPU reset */
-       mutex_lock(&adev->lock_reset);
+       r = down_read_killable(&adev->reset_sem);
+       if (r)
+               return r;
  
         /* hold on the scheduler */
         for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -1269,7 +1275,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
                 kthread_unpark(ring->sched.thread);
         }
  
-       mutex_unlock(&adev->lock_reset);
+       up_read(&adev->reset_sem);
  
         pm_runtime_mark_last_busy(dev->dev);
         pm_runtime_put_autosuspend(dev->dev);
@@ -1459,7 +1465,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
                 return -ENOMEM;
  
         /* Avoid accidently unparking the sched thread during GPU reset */
-       mutex_lock(&adev->lock_reset);
+       r = down_read_killable(&adev->reset_sem);
+       if (r)
+               goto pro_end;
  
         /* stop the scheduler */
         kthread_park(ring->sched.thread);
@@ -1500,13 +1508,14 @@ failure:
         /* restart the scheduler */
         kthread_unpark(ring->sched.thread);
  
-       mutex_unlock(&adev->lock_reset);
+       up_read(&adev->reset_sem);
  
         ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
  
+pro_end:
         kfree(fences);
  
-       return 0;
+       return r;
  }
  
  static int amdgpu_debugfs_sclk_set(void *data, u64 val)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 78fd2c9a7b7d8b4009712c5db1292f3a9c0f4b80..82242e2f56588b3ee576bb010bc28c96dc26d24c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3054,7 +3054,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
         mutex_init(&adev->virt.vf_errors.lock);
         hash_init(adev->mn_hash);
         atomic_set(&adev->in_gpu_reset, 0);
-       mutex_init(&adev->lock_reset);
+       init_rwsem(&adev->reset_sem);
         mutex_init(&adev->psp.mutex);
         mutex_init(&adev->notifier_lock);
  
@@ -4206,7 +4206,7 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev)
         if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
                 return false;
  
-       mutex_lock(&adev->lock_reset);
+       down_write(&adev->reset_sem);
  
         atomic_inc(&adev->gpu_reset_counter);
         switch (amdgpu_asic_reset_method(adev)) {
@@ -4229,7 +4229,7 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
         amdgpu_vf_error_trans_all(adev);
         adev->mp1_state = PP_MP1_STATE_NONE;
         atomic_set(&adev->in_gpu_reset, 0);
-       mutex_unlock(&adev->lock_reset);
+       up_write(&adev->reset_sem);
  }
  
  static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c

index f27d83f2de78920276e1eb2c82a9ac949d72619d..9c07014d9bd6ce4ba1e03568e349b37b297f4e52 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -238,19 +238,15 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
         struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
         struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
         int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
-       int locked;
  
         /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
          * otherwise the mailbox msg will be ruined/reseted by
          * the VF FLR.
-        *
-        * we can unlock the lock_reset to allow "amdgpu_job_timedout"
-        * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
-        * which means host side had finished this VF's FLR.
          */
-       locked = mutex_trylock(&adev->lock_reset);
-       if (locked)
-               atomic_set(&adev->in_gpu_reset, 1);
+       if (!down_read_trylock(&adev->reset_sem))
+               return;
+
+       atomic_set(&adev->in_gpu_reset, 1);
  
         do {
                 if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
@@ -261,10 +257,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
         } while (timeout > 1);
  
  flr_done:
-       if (locked) {
-               atomic_set(&adev->in_gpu_reset, 0);
-               mutex_unlock(&adev->lock_reset);
-       }
+       atomic_set(&adev->in_gpu_reset, 0);
+       up_read(&adev->reset_sem);
  
         /* Trigger recovery for world switch failure if no TDR */
         if (amdgpu_device_should_recover_gpu(adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c

index 3cb10ab943a6661f03484d538c33fcb3de54f8a4..9c23abf9b140d8f177ee1c2e27371ab77ebbc09c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -259,19 +259,15 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
         struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
         struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
         int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
-       int locked;
  
         /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
          * otherwise the mailbox msg will be ruined/reseted by
          * the VF FLR.
-        *
-        * we can unlock the lock_reset to allow "amdgpu_job_timedout"
-        * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
-        * which means host side had finished this VF's FLR.
          */
-       locked = mutex_trylock(&adev->lock_reset);
-       if (locked)
-               atomic_set(&adev->in_gpu_reset, 1);
+       if (!down_read_trylock(&adev->reset_sem))
+               return;
+
+       atomic_set(&adev->in_gpu_reset, 1);
  
         do {
                 if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
@@ -282,10 +278,8 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
         } while (timeout > 1);
  
  flr_done:
-       if (locked) {
-               atomic_set(&adev->in_gpu_reset, 0);
-               mutex_unlock(&adev->lock_reset);
-       }
+       atomic_set(&adev->in_gpu_reset, 0);
+       up_read(&adev->reset_sem);
  
         /* Trigger recovery for world switch failure if no TDR */
         if (amdgpu_device_should_recover_gpu(adev)
author	Dennis Li <Dennis.Li@amd.com>
	Thu, 20 Aug 2020 02:06:32 +0000 (10:06 +0800)
committer	Alex Deucher <alexander.deucher@amd.com>
	Mon, 24 Aug 2020 16:23:48 +0000 (12:23 -0400)
drivers/gpu/drm/amd/amdgpu/amdgpu.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c		patch \| blob \| history