INIT_LIST_HEAD(&prange->update_list);
INIT_LIST_HEAD(&prange->remove_list);
INIT_LIST_HEAD(&prange->insert_list);
+ INIT_LIST_HEAD(&prange->deferred_list);
+ INIT_LIST_HEAD(&prange->child_list);
mutex_init(&prange->lock);
svm_range_set_default_attributes(&prange->preferred_loc,
&prange->prefetch_loc,
return r;
}
+static void
+svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
+ struct svm_range *pchild, enum svm_work_list_ops op)
+{
+ pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
+ pchild, pchild->start, pchild->last, prange, op);
+
+ pchild->work_item.mm = mm;
+ pchild->work_item.op = op;
+ list_add_tail(&pchild->child_list, &prange->child_list);
+}
+
/*
* Validation+GPU mapping with concurrent invalidation (MMU notifiers)
*
return r;
}
+/**
+ * svm_range_list_lock_and_flush_work - flush pending deferred work
+ *
+ * @svms: the svm range list
+ * @mm: the mm structure
+ *
+ * Context: Returns with mmap write lock held, pending deferred work flushed
+ *
+ */
+static void
+svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
+ struct mm_struct *mm)
+{
+retry_flush_work:
+ flush_work(&svms->deferred_list_work);
+ mmap_write_lock(mm);
+
+ if (list_empty(&svms->deferred_range_list))
+ return;
+ mmap_write_unlock(mm);
+ pr_debug("retry flush\n");
+ goto retry_flush_work;
+}
+
static struct svm_range *svm_range_clone(struct svm_range *old)
{
struct svm_range *new;
return r;
}
+static void
+svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
+ struct svm_range *prange)
+{
+ unsigned long start;
+ unsigned long last;
+
+ start = prange->notifier.interval_tree.start >> PAGE_SHIFT;
+ last = prange->notifier.interval_tree.last >> PAGE_SHIFT;
+
+ if (prange->start == start && prange->last == last)
+ return;
+
+ pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+ prange->svms, prange, start, last, prange->start,
+ prange->last);
+
+ if (start != 0 && last != 0) {
+ interval_tree_remove(&prange->it_node, &prange->svms->objects);
+ svm_range_remove_notifier(prange);
+ }
+ prange->it_node.start = prange->start;
+ prange->it_node.last = prange->last;
+
+ interval_tree_insert(&prange->it_node, &prange->svms->objects);
+ svm_range_add_notifier_locked(mm, prange);
+}
+
+static void
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+{
+ struct mm_struct *mm = prange->work_item.mm;
+
+ switch (prange->work_item.op) {
+ case SVM_OP_NULL:
+ pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+ svms, prange, prange->start, prange->last);
+ break;
+ case SVM_OP_UNMAP_RANGE:
+ pr_debug("remove 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+ svms, prange, prange->start, prange->last);
+ svm_range_unlink(prange);
+ svm_range_remove_notifier(prange);
+ svm_range_free(prange);
+ break;
+ case SVM_OP_UPDATE_RANGE_NOTIFIER:
+ pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+ svms, prange, prange->start, prange->last);
+ svm_range_update_notifier_and_interval_tree(mm, prange);
+ break;
+ case SVM_OP_ADD_RANGE:
+ pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange,
+ prange->start, prange->last);
+ svm_range_add_to_svms(prange);
+ svm_range_add_notifier_locked(mm, prange);
+ break;
+ default:
+ WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange,
+ prange->work_item.op);
+ }
+}
+
+static void svm_range_deferred_list_work(struct work_struct *work)
+{
+ struct svm_range_list *svms;
+ struct svm_range *prange;
+ struct mm_struct *mm;
+
+ svms = container_of(work, struct svm_range_list, deferred_list_work);
+ pr_debug("enter svms 0x%p\n", svms);
+
+ spin_lock(&svms->deferred_list_lock);
+ while (!list_empty(&svms->deferred_range_list)) {
+ prange = list_first_entry(&svms->deferred_range_list,
+ struct svm_range, deferred_list);
+ spin_unlock(&svms->deferred_list_lock);
+ pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
+ prange->start, prange->last, prange->work_item.op);
+
+ mm = prange->work_item.mm;
+ mmap_write_lock(mm);
+ mutex_lock(&svms->lock);
+
+ /* Remove from deferred_list must be inside mmap write lock,
+ * otherwise, svm_range_list_lock_and_flush_work may hold mmap
+ * write lock, and continue because deferred_list is empty, then
+ * deferred_list handle is blocked by mmap write lock.
+ */
+ spin_lock(&svms->deferred_list_lock);
+ list_del_init(&prange->deferred_list);
+ spin_unlock(&svms->deferred_list_lock);
+
+ while (!list_empty(&prange->child_list)) {
+ struct svm_range *pchild;
+
+ pchild = list_first_entry(&prange->child_list,
+ struct svm_range, child_list);
+ pr_debug("child prange 0x%p op %d\n", pchild,
+ pchild->work_item.op);
+ list_del_init(&pchild->child_list);
+ svm_range_handle_list_op(svms, pchild);
+ }
+
+ svm_range_handle_list_op(svms, prange);
+ mutex_unlock(&svms->lock);
+ mmap_write_unlock(mm);
+
+ spin_lock(&svms->deferred_list_lock);
+ }
+ spin_unlock(&svms->deferred_list_lock);
+
+ pr_debug("exit svms 0x%p\n", svms);
+}
+
+static void
+svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
+ struct mm_struct *mm, enum svm_work_list_ops op)
+{
+ spin_lock(&svms->deferred_list_lock);
+ /* if prange is on the deferred list */
+ if (!list_empty(&prange->deferred_list)) {
+ pr_debug("update exist prange 0x%p work op %d\n", prange, op);
+ WARN_ONCE(prange->work_item.mm != mm, "unmatch mm\n");
+ if (op != SVM_OP_NULL &&
+ prange->work_item.op != SVM_OP_UNMAP_RANGE)
+ prange->work_item.op = op;
+ } else {
+ prange->work_item.op = op;
+ prange->work_item.mm = mm;
+ list_add_tail(&prange->deferred_list,
+ &prange->svms->deferred_range_list);
+ pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
+ prange, prange->start, prange->last, op);
+ }
+ spin_unlock(&svms->deferred_list_lock);
+}
+
+static void schedule_deferred_list_work(struct svm_range_list *svms)
+{
+ spin_lock(&svms->deferred_list_lock);
+ if (!list_empty(&svms->deferred_range_list))
+ schedule_work(&svms->deferred_list_work);
+ spin_unlock(&svms->deferred_list_lock);
+}
+
+static void
+svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
+ struct svm_range *prange, unsigned long start,
+ unsigned long last)
+{
+ struct svm_range *head;
+ struct svm_range *tail;
+
+ if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
+ pr_debug("prange 0x%p [0x%lx 0x%lx] is already freed\n", prange,
+ prange->start, prange->last);
+ return;
+ }
+ if (start > prange->last || last < prange->start)
+ return;
+
+ head = tail = prange;
+ if (start > prange->start)
+ svm_range_split(prange, prange->start, start - 1, &tail);
+ if (last < tail->last)
+ svm_range_split(tail, last + 1, tail->last, &head);
+
+ if (head != prange && tail != prange) {
+ svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+ } else if (tail != prange) {
+ svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE);
+ } else if (head != prange) {
+ svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+ } else if (parent != prange) {
+ prange->work_item.op = SVM_OP_UNMAP_RANGE;
+ }
+}
+
+static void
+svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
+ unsigned long start, unsigned long last)
+{
+ struct svm_range_list *svms;
+ struct svm_range *pchild;
+ struct kfd_process *p;
+ bool unmap_parent;
+
+ p = kfd_lookup_process_by_mm(mm);
+ if (!p)
+ return;
+ svms = &p->svms;
+
+ pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
+ prange, prange->start, prange->last, start, last);
+
+ unmap_parent = start <= prange->start && last >= prange->last;
+
+ list_for_each_entry(pchild, &prange->child_list, child_list)
+ svm_range_unmap_split(mm, prange, pchild, start, last);
+ svm_range_unmap_split(mm, prange, prange, start, last);
+
+ if (unmap_parent)
+ svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
+ else
+ svm_range_add_list_work(svms, prange, mm,
+ SVM_OP_UPDATE_RANGE_NOTIFIER);
+ schedule_deferred_list_work(svms);
+
+ kfd_unref_process(p);
+}
+
/**
* svm_range_cpu_invalidate_pagetables - interval notifier callback
*
+ * MMU range unmap notifier to remove svm ranges
*/
static bool
svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq)
{
+ struct svm_range *prange;
+ unsigned long start;
+ unsigned long last;
+
+ if (range->event == MMU_NOTIFY_RELEASE)
+ return true;
+
+ start = mni->interval_tree.start;
+ last = mni->interval_tree.last;
+ start = (start > range->start ? start : range->start) >> PAGE_SHIFT;
+ last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT;
+ pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n",
+ start, last, range->start >> PAGE_SHIFT,
+ (range->end - 1) >> PAGE_SHIFT,
+ mni->interval_tree.start >> PAGE_SHIFT,
+ mni->interval_tree.last >> PAGE_SHIFT, range->event);
+
+ prange = container_of(mni, struct svm_range, notifier);
+
+ svm_range_lock(prange);
+ mmu_interval_set_seq(mni, cur_seq);
+
+ switch (range->event) {
+ case MMU_NOTIFY_UNMAP:
+ svm_range_unmap_from_cpu(mni->mm, prange, start, last);
+ break;
+ default:
+ break;
+ }
+
+ svm_range_unlock(prange);
+
return true;
}
mutex_destroy(&p->svms.lock);
pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
+
+ /* Ensure list work is finished before process is destroyed */
+ flush_work(&p->svms.deferred_list_work);
}
int svm_range_list_init(struct kfd_process *p)
svms->objects = RB_ROOT_CACHED;
mutex_init(&svms->lock);
INIT_LIST_HEAD(&svms->list);
+ INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
+ INIT_LIST_HEAD(&svms->deferred_range_list);
+ spin_lock_init(&svms->deferred_list_lock);
return 0;
}
mutex_lock(&process_info->lock);
- mmap_write_lock(mm);
+ svm_range_list_lock_and_flush_work(svms, mm);
if (!svm_range_is_valid(mm, start, size)) {
pr_debug("invalid range\n");