mm: device exclusive memory access
authorAlistair Popple <apopple@nvidia.com>
Thu, 1 Jul 2021 01:54:25 +0000 (18:54 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 1 Jul 2021 18:06:03 +0000 (11:06 -0700)
Some devices require exclusive write access to shared virtual memory (SVM)
ranges to perform atomic operations on that memory.  This requires CPU
page tables to be updated to deny access whilst atomic operations are
occurring.

In order to do this introduce a new swap entry type
(SWP_DEVICE_EXCLUSIVE).  When a SVM range needs to be marked for exclusive
access by a device all page table mappings for the particular range are
replaced with device exclusive swap entries.  This causes any CPU access
to the page to result in a fault.

Faults are resovled by replacing the faulting entry with the original
mapping.  This results in MMU notifiers being called which a driver uses
to update access permissions such as revoking atomic access.  After
notifiers have been called the device will no longer have exclusive access
to the region.

Walking of the page tables to find the target pages is handled by
get_user_pages() rather than a direct page table walk.  A direct page
table walk similar to what migrate_vma_collect()/unmap() does could also
have been utilised.  However this resulted in more code similar in
functionality to what get_user_pages() provides as page faulting is
required to make the PTEs present and to break COW.

[dan.carpenter@oracle.com: fix signedness bug in make_device_exclusive_range()]
Link: https://lkml.kernel.org/r/YNIz5NVnZ5GiZ3u1@mwanda
Link: https://lkml.kernel.org/r/20210616105937.23201-8-apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/vm/hmm.rst
include/linux/mmu_notifier.h
include/linux/rmap.h
include/linux/swap.h
include/linux/swapops.h
mm/hmm.c
mm/memory.c
mm/mprotect.c
mm/page_vma_mapped.c
mm/rmap.c

index 3df79307a7978fde37c16e1c0e1948d77d8d1c8c..a14c2938e7af3c5b18da3661fdcc31c75e695ce9 100644 (file)
@@ -405,6 +405,23 @@ between device driver specific code and shared common code:
 
    The lock can now be released.
 
+Exclusive access memory
+=======================
+
+Some devices have features such as atomic PTE bits that can be used to implement
+atomic access to system memory. To support atomic operations to a shared virtual
+memory page such a device needs access to that page which is exclusive of any
+userspace access from the CPU. The ``make_device_exclusive_range()`` function
+can be used to make a memory range inaccessible from userspace.
+
+This replaces all mappings for pages in the given range with special swap
+entries. Any attempt to access the swap entry results in a fault which is
+resovled by replacing the entry with the original mapping. A driver gets
+notified that the mapping has been changed by MMU notifiers, after which point
+it will no longer have exclusive access to the page. Exclusive access is
+guranteed to last until the driver drops the page lock and page reference, at
+which point any CPU faults on the page may proceed as described.
+
 Memory cgroup (memcg) and rss accounting
 ========================================
 
index 8e428eb813b85f9e370444628a0e01537d495785..6692da8d121d4c9b2e4aaadadedbd7624f3138d3 100644 (file)
@@ -42,6 +42,11 @@ struct mmu_interval_notifier;
  * @MMU_NOTIFY_MIGRATE: used during migrate_vma_collect() invalidate to signal
  * a device driver to possibly ignore the invalidation if the
  * owner field matches the driver's device private pgmap owner.
+ *
+ * @MMU_NOTIFY_EXCLUSIVE: to signal a device driver that the device will no
+ * longer have exclusive access to the page. When sent during creation of an
+ * exclusive range the owner will be initialised to the value provided by the
+ * caller of make_device_exclusive_range(), otherwise the owner will be NULL.
  */
 enum mmu_notifier_event {
        MMU_NOTIFY_UNMAP = 0,
@@ -51,6 +56,7 @@ enum mmu_notifier_event {
        MMU_NOTIFY_SOFT_DIRTY,
        MMU_NOTIFY_RELEASE,
        MMU_NOTIFY_MIGRATE,
+       MMU_NOTIFY_EXCLUSIVE,
 };
 
 #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0)
index b0ea9d98302fc923adbdbdf44d1e080364ffd6fd..83fb86133fe19448908c82362ad2a18b4d8d3f3c 100644 (file)
@@ -194,6 +194,10 @@ int page_referenced(struct page *, int is_locked,
 void try_to_migrate(struct page *page, enum ttu_flags flags);
 void try_to_unmap(struct page *, enum ttu_flags flags);
 
+int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
+                               unsigned long end, struct page **pages,
+                               void *arg);
+
 /* Avoid racy checks */
 #define PVMW_SYNC              (1 << 0)
 /* Look for migarion entries rather than present PTEs */
index df7cbb6b3d3eab4065f18545c31547eb5406c126..6f5a43251593c8a554041fe5ce603df774267041 100644 (file)
@@ -62,12 +62,17 @@ static inline int current_is_kswapd(void)
  * migrate part of a process memory to device memory.
  *
  * When a page is migrated from CPU to device, we set the CPU page table entry
- * to a special SWP_DEVICE_* entry.
+ * to a special SWP_DEVICE_{READ|WRITE} entry.
+ *
+ * When a page is mapped by the device for exclusive access we set the CPU page
+ * table entries to special SWP_DEVICE_EXCLUSIVE_* entries.
  */
 #ifdef CONFIG_DEVICE_PRIVATE
-#define SWP_DEVICE_NUM 2
+#define SWP_DEVICE_NUM 4
 #define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
 #define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
+#define SWP_DEVICE_EXCLUSIVE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
+#define SWP_DEVICE_EXCLUSIVE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3)
 #else
 #define SWP_DEVICE_NUM 0
 #endif
index 04d76357aa0c1a94e9b8be13ab47bd16c449ba28..d356ab4047f772075fe8b080435bf87b2a9080d0 100644 (file)
@@ -127,6 +127,27 @@ static inline bool is_writable_device_private_entry(swp_entry_t entry)
 {
        return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
 }
+
+static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset)
+{
+       return swp_entry(SWP_DEVICE_EXCLUSIVE_READ, offset);
+}
+
+static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset)
+{
+       return swp_entry(SWP_DEVICE_EXCLUSIVE_WRITE, offset);
+}
+
+static inline bool is_device_exclusive_entry(swp_entry_t entry)
+{
+       return swp_type(entry) == SWP_DEVICE_EXCLUSIVE_READ ||
+               swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE;
+}
+
+static inline bool is_writable_device_exclusive_entry(swp_entry_t entry)
+{
+       return unlikely(swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE);
+}
 #else /* CONFIG_DEVICE_PRIVATE */
 static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
 {
@@ -147,6 +168,26 @@ static inline bool is_writable_device_private_entry(swp_entry_t entry)
 {
        return false;
 }
+
+static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset)
+{
+       return swp_entry(0, 0);
+}
+
+static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset)
+{
+       return swp_entry(0, 0);
+}
+
+static inline bool is_device_exclusive_entry(swp_entry_t entry)
+{
+       return false;
+}
+
+static inline bool is_writable_device_exclusive_entry(swp_entry_t entry)
+{
+       return false;
+}
 #endif /* CONFIG_DEVICE_PRIVATE */
 
 #ifdef CONFIG_MIGRATION
@@ -226,7 +267,8 @@ static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
  */
 static inline bool is_pfn_swap_entry(swp_entry_t entry)
 {
-       return is_migration_entry(entry) || is_device_private_entry(entry);
+       return is_migration_entry(entry) || is_device_private_entry(entry) ||
+              is_device_exclusive_entry(entry);
 }
 
 struct page_vma_mapped_walk;
index 11df3ca30b82f094cb12fd8f890698fb5bfc0dcb..fad6be2bf07274b20ba5ec3e4749f9f80985a21d 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -26,6 +26,8 @@
 #include <linux/mmu_notifier.h>
 #include <linux/memory_hotplug.h>
 
+#include "internal.h"
+
 struct hmm_vma_walk {
        struct hmm_range        *range;
        unsigned long           last;
@@ -271,6 +273,9 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
                if (!non_swap_entry(entry))
                        goto fault;
 
+               if (is_device_exclusive_entry(entry))
+                       goto fault;
+
                if (is_migration_entry(entry)) {
                        pte_unmap(ptep);
                        hmm_vma_walk->last = addr;
index 75feddbf0190f1e156d6c43749f7412971024d91..747a01d495f2c5cd0ff71e0910925a3374af316d 100644 (file)
@@ -699,6 +699,68 @@ out:
 }
 #endif
 
+static void restore_exclusive_pte(struct vm_area_struct *vma,
+                                 struct page *page, unsigned long address,
+                                 pte_t *ptep)
+{
+       pte_t pte;
+       swp_entry_t entry;
+
+       pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot)));
+       if (pte_swp_soft_dirty(*ptep))
+               pte = pte_mksoft_dirty(pte);
+
+       entry = pte_to_swp_entry(*ptep);
+       if (pte_swp_uffd_wp(*ptep))
+               pte = pte_mkuffd_wp(pte);
+       else if (is_writable_device_exclusive_entry(entry))
+               pte = maybe_mkwrite(pte_mkdirty(pte), vma);
+
+       set_pte_at(vma->vm_mm, address, ptep, pte);
+
+       /*
+        * No need to take a page reference as one was already
+        * created when the swap entry was made.
+        */
+       if (PageAnon(page))
+               page_add_anon_rmap(page, vma, address, false);
+       else
+               /*
+                * Currently device exclusive access only supports anonymous
+                * memory so the entry shouldn't point to a filebacked page.
+                */
+               WARN_ON_ONCE(!PageAnon(page));
+
+       if (vma->vm_flags & VM_LOCKED)
+               mlock_vma_page(page);
+
+       /*
+        * No need to invalidate - it was non-present before. However
+        * secondary CPUs may have mappings that need invalidating.
+        */
+       update_mmu_cache(vma, address, ptep);
+}
+
+/*
+ * Tries to restore an exclusive pte if the page lock can be acquired without
+ * sleeping.
+ */
+static int
+try_restore_exclusive_pte(pte_t *src_pte, struct vm_area_struct *vma,
+                       unsigned long addr)
+{
+       swp_entry_t entry = pte_to_swp_entry(*src_pte);
+       struct page *page = pfn_swap_entry_to_page(entry);
+
+       if (trylock_page(page)) {
+               restore_exclusive_pte(vma, page, addr, src_pte);
+               unlock_page(page);
+               return 0;
+       }
+
+       return -EBUSY;
+}
+
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
  * already present in the new task to be cleared in the whole range
@@ -780,6 +842,17 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                                pte = pte_swp_mkuffd_wp(pte);
                        set_pte_at(src_mm, addr, src_pte, pte);
                }
+       } else if (is_device_exclusive_entry(entry)) {
+               /*
+                * Make device exclusive entries present by restoring the
+                * original entry then copying as for a present pte. Device
+                * exclusive entries currently only support private writable
+                * (ie. COW) mappings.
+                */
+               VM_BUG_ON(!is_cow_mapping(src_vma->vm_flags));
+               if (try_restore_exclusive_pte(src_pte, src_vma, addr))
+                       return -EBUSY;
+               return -ENOENT;
        }
        if (!userfaultfd_wp(dst_vma))
                pte = pte_swp_clear_uffd_wp(pte);
@@ -980,9 +1053,18 @@ again:
                        if (ret == -EIO) {
                                entry = pte_to_swp_entry(*src_pte);
                                break;
+                       } else if (ret == -EBUSY) {
+                               break;
+                       } else if (!ret) {
+                               progress += 8;
+                               continue;
                        }
-                       progress += 8;
-                       continue;
+
+                       /*
+                        * Device exclusive entry restored, continue by copying
+                        * the now present pte.
+                        */
+                       WARN_ON_ONCE(ret != -ENOENT);
                }
                /* copy_present_pte() will clear `*prealloc' if consumed */
                ret = copy_present_pte(dst_vma, src_vma, dst_pte, src_pte,
@@ -1020,6 +1102,8 @@ again:
                        goto out;
                }
                entry.val = 0;
+       } else if (ret == -EBUSY) {
+               goto out;
        } else if (ret ==  -EAGAIN) {
                prealloc = page_copy_prealloc(src_mm, src_vma, addr);
                if (!prealloc)
@@ -1287,7 +1371,8 @@ again:
                }
 
                entry = pte_to_swp_entry(ptent);
-               if (is_device_private_entry(entry)) {
+               if (is_device_private_entry(entry) ||
+                   is_device_exclusive_entry(entry)) {
                        struct page *page = pfn_swap_entry_to_page(entry);
 
                        if (unlikely(details && details->check_mapping)) {
@@ -1303,7 +1388,10 @@ again:
 
                        pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
                        rss[mm_counter(page)]--;
-                       page_remove_rmap(page, false);
+
+                       if (is_device_private_entry(entry))
+                               page_remove_rmap(page, false);
+
                        put_page(page);
                        continue;
                }
@@ -3351,6 +3439,34 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
+/*
+ * Restore a potential device exclusive pte to a working pte entry
+ */
+static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
+{
+       struct page *page = vmf->page;
+       struct vm_area_struct *vma = vmf->vma;
+       struct mmu_notifier_range range;
+
+       if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags))
+               return VM_FAULT_RETRY;
+       mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma,
+                               vma->vm_mm, vmf->address & PAGE_MASK,
+                               (vmf->address & PAGE_MASK) + PAGE_SIZE, NULL);
+       mmu_notifier_invalidate_range_start(&range);
+
+       vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
+                               &vmf->ptl);
+       if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
+               restore_exclusive_pte(vma, page, vmf->address, vmf->pte);
+
+       pte_unmap_unlock(vmf->pte, vmf->ptl);
+       unlock_page(page);
+
+       mmu_notifier_invalidate_range_end(&range);
+       return 0;
+}
+
 /*
  * We enter with non-exclusive mmap_lock (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
@@ -3379,6 +3495,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                if (is_migration_entry(entry)) {
                        migration_entry_wait(vma->vm_mm, vmf->pmd,
                                             vmf->address);
+               } else if (is_device_exclusive_entry(entry)) {
+                       vmf->page = pfn_swap_entry_to_page(entry);
+                       ret = remove_device_exclusive_entry(vmf);
                } else if (is_device_private_entry(entry)) {
                        vmf->page = pfn_swap_entry_to_page(entry);
                        ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
index ee5961888e7075bdfdbe606a6df6efbfeb23b194..883e2cc85cad84ad0e67822b8ac22a0ecbae9aee 100644 (file)
@@ -165,6 +165,14 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                                newpte = swp_entry_to_pte(entry);
                                if (pte_swp_uffd_wp(oldpte))
                                        newpte = pte_swp_mkuffd_wp(newpte);
+                       } else if (is_writable_device_exclusive_entry(entry)) {
+                               entry = make_readable_device_exclusive_entry(
+                                                       swp_offset(entry));
+                               newpte = swp_entry_to_pte(entry);
+                               if (pte_swp_soft_dirty(oldpte))
+                                       newpte = pte_swp_mksoft_dirty(newpte);
+                               if (pte_swp_uffd_wp(oldpte))
+                                       newpte = pte_swp_mkuffd_wp(newpte);
                        } else {
                                newpte = oldpte;
                        }
index 4df6093e759bd0b1d3e3ce42833d9964bbe23af4..f7b3310817910f26ea3c8e384995e1a138a1f601 100644 (file)
@@ -41,7 +41,8 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw)
 
                                /* Handle un-addressable ZONE_DEVICE memory */
                                entry = pte_to_swp_entry(*pvmw->pte);
-                               if (!is_device_private_entry(entry))
+                               if (!is_device_private_entry(entry) &&
+                                   !is_device_exclusive_entry(entry))
                                        return false;
                        } else if (!pte_present(*pvmw->pte))
                                return false;
@@ -93,7 +94,8 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
                        return false;
                entry = pte_to_swp_entry(*pvmw->pte);
 
-               if (!is_migration_entry(entry))
+               if (!is_migration_entry(entry) &&
+                   !is_device_exclusive_entry(entry))
                        return false;
 
                pfn = swp_offset(entry);
@@ -102,7 +104,8 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
 
                /* Handle un-addressable ZONE_DEVICE memory */
                entry = pte_to_swp_entry(*pvmw->pte);
-               if (!is_device_private_entry(entry))
+               if (!is_device_private_entry(entry) &&
+                   !is_device_exclusive_entry(entry))
                        return false;
 
                pfn = swp_offset(entry);
index b922c7074cdddbd9abe53f65f130494286639930..37c24672125ccd650d8342f075f44d1cc3c181d6 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2028,6 +2028,192 @@ void page_mlock(struct page *page)
        rmap_walk(page, &rwc);
 }
 
+#ifdef CONFIG_DEVICE_PRIVATE
+struct make_exclusive_args {
+       struct mm_struct *mm;
+       unsigned long address;
+       void *owner;
+       bool valid;
+};
+
+static bool page_make_device_exclusive_one(struct page *page,
+               struct vm_area_struct *vma, unsigned long address, void *priv)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct page_vma_mapped_walk pvmw = {
+               .page = page,
+               .vma = vma,
+               .address = address,
+       };
+       struct make_exclusive_args *args = priv;
+       pte_t pteval;
+       struct page *subpage;
+       bool ret = true;
+       struct mmu_notifier_range range;
+       swp_entry_t entry;
+       pte_t swp_pte;
+
+       mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma,
+                                     vma->vm_mm, address, min(vma->vm_end,
+                                     address + page_size(page)), args->owner);
+       mmu_notifier_invalidate_range_start(&range);
+
+       while (page_vma_mapped_walk(&pvmw)) {
+               /* Unexpected PMD-mapped THP? */
+               VM_BUG_ON_PAGE(!pvmw.pte, page);
+
+               if (!pte_present(*pvmw.pte)) {
+                       ret = false;
+                       page_vma_mapped_walk_done(&pvmw);
+                       break;
+               }
+
+               subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+               address = pvmw.address;
+
+               /* Nuke the page table entry. */
+               flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+               pteval = ptep_clear_flush(vma, address, pvmw.pte);
+
+               /* Move the dirty bit to the page. Now the pte is gone. */
+               if (pte_dirty(pteval))
+                       set_page_dirty(page);
+
+               /*
+                * Check that our target page is still mapped at the expected
+                * address.
+                */
+               if (args->mm == mm && args->address == address &&
+                   pte_write(pteval))
+                       args->valid = true;
+
+               /*
+                * Store the pfn of the page in a special migration
+                * pte. do_swap_page() will wait until the migration
+                * pte is removed and then restart fault handling.
+                */
+               if (pte_write(pteval))
+                       entry = make_writable_device_exclusive_entry(
+                                                       page_to_pfn(subpage));
+               else
+                       entry = make_readable_device_exclusive_entry(
+                                                       page_to_pfn(subpage));
+               swp_pte = swp_entry_to_pte(entry);
+               if (pte_soft_dirty(pteval))
+                       swp_pte = pte_swp_mksoft_dirty(swp_pte);
+               if (pte_uffd_wp(pteval))
+                       swp_pte = pte_swp_mkuffd_wp(swp_pte);
+
+               set_pte_at(mm, address, pvmw.pte, swp_pte);
+
+               /*
+                * There is a reference on the page for the swap entry which has
+                * been removed, so shouldn't take another.
+                */
+               page_remove_rmap(subpage, false);
+       }
+
+       mmu_notifier_invalidate_range_end(&range);
+
+       return ret;
+}
+
+/**
+ * page_make_device_exclusive - mark the page exclusively owned by a device
+ * @page: the page to replace page table entries for
+ * @mm: the mm_struct where the page is expected to be mapped
+ * @address: address where the page is expected to be mapped
+ * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier callbacks
+ *
+ * Tries to remove all the page table entries which are mapping this page and
+ * replace them with special device exclusive swap entries to grant a device
+ * exclusive access to the page. Caller must hold the page lock.
+ *
+ * Returns false if the page is still mapped, or if it could not be unmapped
+ * from the expected address. Otherwise returns true (success).
+ */
+static bool page_make_device_exclusive(struct page *page, struct mm_struct *mm,
+                               unsigned long address, void *owner)
+{
+       struct make_exclusive_args args = {
+               .mm = mm,
+               .address = address,
+               .owner = owner,
+               .valid = false,
+       };
+       struct rmap_walk_control rwc = {
+               .rmap_one = page_make_device_exclusive_one,
+               .done = page_not_mapped,
+               .anon_lock = page_lock_anon_vma_read,
+               .arg = &args,
+       };
+
+       /*
+        * Restrict to anonymous pages for now to avoid potential writeback
+        * issues. Also tail pages shouldn't be passed to rmap_walk so skip
+        * those.
+        */
+       if (!PageAnon(page) || PageTail(page))
+               return false;
+
+       rmap_walk(page, &rwc);
+
+       return args.valid && !page_mapcount(page);
+}
+
+/**
+ * make_device_exclusive_range() - Mark a range for exclusive use by a device
+ * @mm: mm_struct of assoicated target process
+ * @start: start of the region to mark for exclusive device access
+ * @end: end address of region
+ * @pages: returns the pages which were successfully marked for exclusive access
+ * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier to allow filtering
+ *
+ * Returns: number of pages found in the range by GUP. A page is marked for
+ * exclusive access only if the page pointer is non-NULL.
+ *
+ * This function finds ptes mapping page(s) to the given address range, locks
+ * them and replaces mappings with special swap entries preventing userspace CPU
+ * access. On fault these entries are replaced with the original mapping after
+ * calling MMU notifiers.
+ *
+ * A driver using this to program access from a device must use a mmu notifier
+ * critical section to hold a device specific lock during programming. Once
+ * programming is complete it should drop the page lock and reference after
+ * which point CPU access to the page will revoke the exclusive access.
+ */
+int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
+                               unsigned long end, struct page **pages,
+                               void *owner)
+{
+       long npages = (end - start) >> PAGE_SHIFT;
+       long i;
+
+       npages = get_user_pages_remote(mm, start, npages,
+                                      FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD,
+                                      pages, NULL, NULL);
+       if (npages < 0)
+               return npages;
+
+       for (i = 0; i < npages; i++, start += PAGE_SIZE) {
+               if (!trylock_page(pages[i])) {
+                       put_page(pages[i]);
+                       pages[i] = NULL;
+                       continue;
+               }
+
+               if (!page_make_device_exclusive(pages[i], mm, start, owner)) {
+                       unlock_page(pages[i]);
+                       put_page(pages[i]);
+                       pages[i] = NULL;
+               }
+       }
+
+       return npages;
+}
+EXPORT_SYMBOL_GPL(make_device_exclusive_range);
+#endif
+
 void __put_anon_vma(struct anon_vma *anon_vma)
 {
        struct anon_vma *root = anon_vma->root;