drm/amdkfd: svm range eviction and restore
authorFelix Kuehling <Felix.Kuehling@amd.com>
Wed, 24 Feb 2021 23:47:52 +0000 (18:47 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 21 Apr 2021 01:47:27 +0000 (21:47 -0400)
HMM interval notifier callback notify CPU page table will be updated,
stop process queues if the updated address belongs to svm range
registered in process svms objects tree. Scheduled restore work to
update GPU page table using new pages address in the updated svm range.

The restore worker flushes any deferred work to make sure it restores
an up-to-date svm_range_list.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.h

index 6cb0c4168fa0ba2f912213891cb3f9dfad3e8d4f..81f71c4079a61435f487cf2f692a1ba9be63ed5f 100644 (file)
@@ -738,6 +738,8 @@ struct svm_range_list {
        struct work_struct              deferred_list_work;
        struct list_head                deferred_range_list;
        spinlock_t                      deferred_list_lock;
+       atomic_t                        evicted_ranges;
+       struct delayed_work             restore_work;
 };
 
 /* Process data */
index 5f1ec75535091a2893a0b551a8604dfabafa055c..3c72e9dc642247431d7070ce3da4c69cca05d1be 100644 (file)
@@ -1064,6 +1064,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 
        cancel_delayed_work_sync(&p->eviction_work);
        cancel_delayed_work_sync(&p->restore_work);
+       cancel_delayed_work_sync(&p->svms.restore_work);
 
        mutex_lock(&p->mutex);
 
index 3ccb75d45f13a39ff45d3c97d163d95966c4efe2..1fe6913242d77ac367b22d1aa602d89a367f0aa1 100644 (file)
@@ -22,6 +22,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/sched/task.h>
 #include "amdgpu_sync.h"
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
@@ -29,6 +30,8 @@
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 
+#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
+
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
                                    const struct mmu_notifier_range *range,
@@ -251,6 +254,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
        INIT_LIST_HEAD(&prange->insert_list);
        INIT_LIST_HEAD(&prange->deferred_list);
        INIT_LIST_HEAD(&prange->child_list);
+       atomic_set(&prange->invalid, 0);
        mutex_init(&prange->lock);
        svm_range_set_default_attributes(&prange->preferred_loc,
                                         &prange->prefetch_loc,
@@ -963,6 +967,129 @@ retry_flush_work:
        goto retry_flush_work;
 }
 
+static void svm_range_restore_work(struct work_struct *work)
+{
+       struct delayed_work *dwork = to_delayed_work(work);
+       struct amdkfd_process_info *process_info;
+       struct svm_range_list *svms;
+       struct svm_range *prange;
+       struct kfd_process *p;
+       struct mm_struct *mm;
+       int evicted_ranges;
+       int invalid;
+       int r;
+
+       svms = container_of(dwork, struct svm_range_list, restore_work);
+       evicted_ranges = atomic_read(&svms->evicted_ranges);
+       if (!evicted_ranges)
+               return;
+
+       pr_debug("restore svm ranges\n");
+
+       /* kfd_process_notifier_release destroys this worker thread. So during
+        * the lifetime of this thread, kfd_process and mm will be valid.
+        */
+       p = container_of(svms, struct kfd_process, svms);
+       process_info = p->kgd_process_info;
+       mm = p->mm;
+       if (!mm)
+               return;
+
+       mutex_lock(&process_info->lock);
+       svm_range_list_lock_and_flush_work(svms, mm);
+       mutex_lock(&svms->lock);
+
+       evicted_ranges = atomic_read(&svms->evicted_ranges);
+
+       list_for_each_entry(prange, &svms->list, list) {
+               invalid = atomic_read(&prange->invalid);
+               if (!invalid)
+                       continue;
+
+               pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
+                        prange->svms, prange, prange->start, prange->last,
+                        invalid);
+
+               r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+                                              false, true);
+               if (r) {
+                       pr_debug("failed %d to map 0x%lx to gpus\n", r,
+                                prange->start);
+                       goto unlock_out;
+               }
+
+               if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid)
+                       goto unlock_out;
+       }
+
+       if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) !=
+           evicted_ranges)
+               goto unlock_out;
+
+       evicted_ranges = 0;
+
+       r = kgd2kfd_resume_mm(mm);
+       if (r) {
+               /* No recovery from this failure. Probably the CP is
+                * hanging. No point trying again.
+                */
+               pr_debug("failed %d to resume KFD\n", r);
+       }
+
+       pr_debug("restore svm ranges successfully\n");
+
+unlock_out:
+       mutex_unlock(&svms->lock);
+       mmap_write_unlock(mm);
+       mutex_unlock(&process_info->lock);
+
+       /* If validation failed, reschedule another attempt */
+       if (evicted_ranges) {
+               pr_debug("reschedule to restore svm range\n");
+               schedule_delayed_work(&svms->restore_work,
+                       msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+       }
+}
+
+/**
+ * svm_range_evict - evict svm range
+ *
+ * Stop all queues of the process to ensure GPU doesn't access the memory, then
+ * return to let CPU evict the buffer and proceed CPU pagetable update.
+ *
+ * Don't need use lock to sync cpu pagetable invalidation with GPU execution.
+ * If invalidation happens while restore work is running, restore work will
+ * restart to ensure to get the latest CPU pages mapping to GPU, then start
+ * the queues.
+ */
+static int
+svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
+               unsigned long start, unsigned long last)
+{
+       struct svm_range_list *svms = prange->svms;
+       int evicted_ranges;
+       int r = 0;
+
+       atomic_inc(&prange->invalid);
+       evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
+       if (evicted_ranges != 1)
+               return r;
+
+       pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
+                prange->svms, prange->start, prange->last);
+
+       /* First eviction, stop the queues */
+       r = kgd2kfd_quiesce_mm(mm);
+       if (r)
+               pr_debug("failed to quiesce KFD\n");
+
+       pr_debug("schedule to restore svm %p ranges\n", svms);
+       schedule_delayed_work(&svms->restore_work,
+               msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+
+       return r;
+}
+
 static struct svm_range *svm_range_clone(struct svm_range *old)
 {
        struct svm_range *new;
@@ -1331,6 +1458,11 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
  * svm_range_cpu_invalidate_pagetables - interval notifier callback
  *
  * MMU range unmap notifier to remove svm ranges
+ *
+ * If GPU vm fault retry is not enabled, evict the svm range, then restore
+ * work will update GPU mapping.
+ * If GPU vm fault retry is enabled, unmap the svm range from GPU, vm fault
+ * will update GPU mapping.
  */
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
@@ -1364,6 +1496,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
                svm_range_unmap_from_cpu(mni->mm, prange, start, last);
                break;
        default:
+               svm_range_evict(prange, mni->mm, start, last);
                break;
        }
 
@@ -1389,6 +1522,8 @@ int svm_range_list_init(struct kfd_process *p)
        svms->objects = RB_ROOT_CACHED;
        mutex_init(&svms->lock);
        INIT_LIST_HEAD(&svms->list);
+       atomic_set(&svms->evicted_ranges, 0);
+       INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
        INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
        INIT_LIST_HEAD(&svms->deferred_range_list);
        spin_lock_init(&svms->deferred_list_lock);
index 5949890bf48cbedd6b6c9ff927612119197e6d55..3c94899c5c4021769e3b7cafca718e778d968531 100644 (file)
@@ -67,6 +67,7 @@ struct svm_work_list_item {
  * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
  * @actual_loc: the actual location, 0 for CPU, or GPU id
  * @granularity:migration granularity, log2 num pages
+ * @invalid:    not 0 means cpu page table is invalidated
  * @notifier:   register mmu interval notifier
  * @work_item:  deferred work item information
  * @deferred_list: list header used to add range to deferred list
@@ -97,6 +98,7 @@ struct svm_range {
        uint32_t                        prefetch_loc;
        uint32_t                        actual_loc;
        uint8_t                         granularity;
+       atomic_t                        invalid;
        struct mmu_interval_notifier    notifier;
        struct svm_work_list_item       work_item;
        struct list_head                deferred_list;