mm: remove special swap entry functions
authorAlistair Popple <apopple@nvidia.com>
Thu, 1 Jul 2021 01:54:06 +0000 (18:54 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 1 Jul 2021 18:06:03 +0000 (11:06 -0700)
Patch series "Add support for SVM atomics in Nouveau", v11.

Introduction
============

Some devices have features such as atomic PTE bits that can be used to
implement atomic access to system memory.  To support atomic operations to
a shared virtual memory page such a device needs access to that page which
is exclusive of the CPU.  This series introduces a mechanism to
temporarily unmap pages granting exclusive access to a device.

These changes are required to support OpenCL atomic operations in Nouveau
to shared virtual memory (SVM) regions allocated with the
CL_MEM_SVM_ATOMICS clSVMAlloc flag.  A more complete description of the
OpenCL SVM feature is available at
https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/
OpenCL_API.html#_shared_virtual_memory .

Implementation
==============

Exclusive device access is implemented by adding a new swap entry type
(SWAP_DEVICE_EXCLUSIVE) which is similar to a migration entry.  The main
difference is that on fault the original entry is immediately restored by
the fault handler instead of waiting.

Restoring the entry triggers calls to MMU notifers which allows a device
driver to revoke the atomic access permission from the GPU prior to the
CPU finalising the entry.

Patches
=======

Patches 1 & 2 refactor existing migration and device private entry
functions.

Patches 3 & 4 rework try_to_unmap_one() by splitting out unrelated
functionality into separate functions - try_to_migrate_one() and
try_to_munlock_one().

Patch 5 renames some existing code but does not introduce functionality.

Patch 6 is a small clean-up to swap entry handling in copy_pte_range().

Patch 7 contains the bulk of the implementation for device exclusive
memory.

Patch 8 contains some additions to the HMM selftests to ensure everything
works as expected.

Patch 9 is a cleanup for the Nouveau SVM implementation.

Patch 10 contains the implementation of atomic access for the Nouveau
driver.

Testing
=======

This has been tested with upstream Mesa 21.1.0 and a simple OpenCL program
which checks that GPU atomic accesses to system memory are atomic.
Without this series the test fails as there is no way of write-protecting
the page mapping which results in the device clobbering CPU writes.  For
reference the test is available at
https://ozlabs.org/~apopple/opencl_svm_atomics/

Further testing has been performed by adding support for testing exclusive
access to the hmm-tests kselftests.

This patch (of 10):

Remove multiple similar inline functions for dealing with different types
of special swap entries.

Both migration and device private swap entries use the swap offset to
store a pfn.  Instead of multiple inline functions to obtain a struct page
for each swap entry type use a common function pfn_swap_entry_to_page().
Also open-code the various entry_to_pfn() functions as this results is
shorter code that is easier to understand.

Link: https://lkml.kernel.org/r/20210616105937.23201-1-apopple@nvidia.com
Link: https://lkml.kernel.org/r/20210616105937.23201-2-apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/s390/mm/pgtable.c
fs/proc/task_mmu.c
include/linux/swap.h
include/linux/swapops.h
mm/hmm.c
mm/huge_memory.c
mm/memcontrol.c
mm/memory.c
mm/migrate.c
mm/page_vma_mapped.c

index 18205f851c247aaf2e20e79a5cc564fe122c38b7..eec3a9d7176e3bef5d9c07b5b134dce7cc945b88 100644 (file)
@@ -691,7 +691,7 @@ static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
        if (!non_swap_entry(entry))
                dec_mm_counter(mm, MM_SWAPENTS);
        else if (is_migration_entry(entry)) {
-               struct page *page = migration_entry_to_page(entry);
+               struct page *page = pfn_swap_entry_to_page(entry);
 
                dec_mm_counter(mm, mm_counter(page));
        }
index 95c8f1e8fea6b67fec174527654e49d86dc73a20..eb97468dfe4ca0c63234a0f576f954811db1371a 100644 (file)
@@ -514,10 +514,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
                        } else {
                                mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
                        }
-               } else if (is_migration_entry(swpent))
-                       page = migration_entry_to_page(swpent);
-               else if (is_device_private_entry(swpent))
-                       page = device_private_entry_to_page(swpent);
+               } else if (is_pfn_swap_entry(swpent))
+                       page = pfn_swap_entry_to_page(swpent);
        } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
                                                        && pte_none(*pte))) {
                page = xa_load(&vma->vm_file->f_mapping->i_pages,
@@ -549,7 +547,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
                swp_entry_t entry = pmd_to_swp_entry(*pmd);
 
                if (is_migration_entry(entry))
-                       page = migration_entry_to_page(entry);
+                       page = pfn_swap_entry_to_page(entry);
        }
        if (IS_ERR_OR_NULL(page))
                return;
@@ -694,10 +692,8 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
        } else if (is_swap_pte(*pte)) {
                swp_entry_t swpent = pte_to_swp_entry(*pte);
 
-               if (is_migration_entry(swpent))
-                       page = migration_entry_to_page(swpent);
-               else if (is_device_private_entry(swpent))
-                       page = device_private_entry_to_page(swpent);
+               if (is_pfn_swap_entry(swpent))
+                       page = pfn_swap_entry_to_page(swpent);
        }
        if (page) {
                int mapcount = page_mapcount(page);
@@ -1389,11 +1385,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
                        frame = swp_type(entry) |
                                (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
                flags |= PM_SWAP;
-               if (is_migration_entry(entry))
-                       page = migration_entry_to_page(entry);
-
-               if (is_device_private_entry(entry))
-                       page = device_private_entry_to_page(entry);
+               if (is_pfn_swap_entry(entry))
+                       page = pfn_swap_entry_to_page(entry);
        }
 
        if (page && !PageAnon(page))
@@ -1454,7 +1447,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
                        if (pmd_swp_uffd_wp(pmd))
                                flags |= PM_UFFD_WP;
                        VM_BUG_ON(!is_pmd_migration_entry(pmd));
-                       page = migration_entry_to_page(entry);
+                       page = pfn_swap_entry_to_page(entry);
                }
 #endif
 
index ac9bd84c905ebf2489c07a0f3ac363d46e8d6757..df7cbb6b3d3eab4065f18545c31547eb5406c126 100644 (file)
@@ -564,8 +564,8 @@ static inline void show_swap_cache_info(void)
 {
 }
 
-#define free_swap_and_cache(e) ({(is_migration_entry(e) || is_device_private_entry(e));})
-#define swapcache_prepare(e) ({(is_migration_entry(e) || is_device_private_entry(e));})
+/* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
+#define free_swap_and_cache(e) is_pfn_swap_entry(e)
 
 static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
 {
index 708fbeb21dd397c8a3511d1368f389cc5124d280..c24c79812bc1dd4726118d2acfaf5a6856efa298 100644 (file)
@@ -128,16 +128,6 @@ static inline bool is_write_device_private_entry(swp_entry_t entry)
 {
        return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
 }
-
-static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry)
-{
-       return swp_offset(entry);
-}
-
-static inline struct page *device_private_entry_to_page(swp_entry_t entry)
-{
-       return pfn_to_page(swp_offset(entry));
-}
 #else /* CONFIG_DEVICE_PRIVATE */
 static inline swp_entry_t make_device_private_entry(struct page *page, bool write)
 {
@@ -157,16 +147,6 @@ static inline bool is_write_device_private_entry(swp_entry_t entry)
 {
        return false;
 }
-
-static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry)
-{
-       return 0;
-}
-
-static inline struct page *device_private_entry_to_page(swp_entry_t entry)
-{
-       return NULL;
-}
 #endif /* CONFIG_DEVICE_PRIVATE */
 
 #ifdef CONFIG_MIGRATION
@@ -189,22 +169,6 @@ static inline int is_write_migration_entry(swp_entry_t entry)
        return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE);
 }
 
-static inline unsigned long migration_entry_to_pfn(swp_entry_t entry)
-{
-       return swp_offset(entry);
-}
-
-static inline struct page *migration_entry_to_page(swp_entry_t entry)
-{
-       struct page *p = pfn_to_page(swp_offset(entry));
-       /*
-        * Any use of migration entries may only occur while the
-        * corresponding page is locked
-        */
-       BUG_ON(!PageLocked(compound_head(p)));
-       return p;
-}
-
 static inline void make_migration_entry_read(swp_entry_t *entry)
 {
        *entry = swp_entry(SWP_MIGRATION_READ, swp_offset(*entry));
@@ -224,16 +188,6 @@ static inline int is_migration_entry(swp_entry_t swp)
        return 0;
 }
 
-static inline unsigned long migration_entry_to_pfn(swp_entry_t entry)
-{
-       return 0;
-}
-
-static inline struct page *migration_entry_to_page(swp_entry_t entry)
-{
-       return NULL;
-}
-
 static inline void make_migration_entry_read(swp_entry_t *entryp) { }
 static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
                                        spinlock_t *ptl) { }
@@ -248,6 +202,29 @@ static inline int is_write_migration_entry(swp_entry_t entry)
 
 #endif
 
+static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
+{
+       struct page *p = pfn_to_page(swp_offset(entry));
+
+       /*
+        * Any use of migration entries may only occur while the
+        * corresponding page is locked
+        */
+       BUG_ON(is_migration_entry(entry) && !PageLocked(p));
+
+       return p;
+}
+
+/*
+ * A pfn swap entry is a special type of swap entry that always has a pfn stored
+ * in the swap offset. They are used to represent unaddressable device memory
+ * and to restrict access to a page undergoing migration.
+ */
+static inline bool is_pfn_swap_entry(swp_entry_t entry)
+{
+       return is_migration_entry(entry) || is_device_private_entry(entry);
+}
+
 struct page_vma_mapped_walk;
 
 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
index 943cb2ba444232565a696e5550ec9f22bd96f4fa..3b2dda71d0ed1982bf5f3e7ef4be42ea73c968af 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -214,7 +214,7 @@ static inline bool hmm_is_device_private_entry(struct hmm_range *range,
                swp_entry_t entry)
 {
        return is_device_private_entry(entry) &&
-               device_private_entry_to_page(entry)->pgmap->owner ==
+               pfn_swap_entry_to_page(entry)->pgmap->owner ==
                range->dev_private_owner;
 }
 
@@ -257,8 +257,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
                        cpu_flags = HMM_PFN_VALID;
                        if (is_write_device_private_entry(entry))
                                cpu_flags |= HMM_PFN_WRITE;
-                       *hmm_pfn = device_private_entry_to_pfn(entry) |
-                                       cpu_flags;
+                       *hmm_pfn = swp_offset(entry) | cpu_flags;
                        return 0;
                }
 
index d513b0cd1161d97acfb33b5f56d862936e2eb95e..327b8d9d8d2f01631037d652c3990f7dd74b606d 100644 (file)
@@ -1643,7 +1643,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 
                        VM_BUG_ON(!is_pmd_migration_entry(orig_pmd));
                        entry = pmd_to_swp_entry(orig_pmd);
-                       page = migration_entry_to_page(entry);
+                       page = pfn_swap_entry_to_page(entry);
                        flush_needed = 0;
                } else
                        WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
@@ -2012,7 +2012,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                        swp_entry_t entry;
 
                        entry = pmd_to_swp_entry(old_pmd);
-                       page = migration_entry_to_page(entry);
+                       page = pfn_swap_entry_to_page(entry);
                } else {
                        page = pmd_page(old_pmd);
                        if (!PageDirty(page) && pmd_dirty(old_pmd))
@@ -2066,7 +2066,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                swp_entry_t entry;
 
                entry = pmd_to_swp_entry(old_pmd);
-               page = migration_entry_to_page(entry);
+               page = pfn_swap_entry_to_page(entry);
                write = is_write_migration_entry(entry);
                young = false;
                soft_dirty = pmd_swp_soft_dirty(old_pmd);
index f63399bff01895d0eae8123c09298ae839d06797..b826dad6fa366e804c2a687451f0970de560babe 100644 (file)
@@ -5532,7 +5532,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
         * as special swap entry in the CPU page table.
         */
        if (is_device_private_entry(ent)) {
-               page = device_private_entry_to_page(ent);
+               page = pfn_swap_entry_to_page(ent);
                /*
                 * MEMORY_DEVICE_PRIVATE means ZONE_DEVICE page and which have
                 * a refcount of 1 when free (unlike normal page)
index 64eda960b75eeaff5916cc6efd7812c247e67b7e..6723931085c77665a63281d641d6cf5a67bb5bf9 100644 (file)
@@ -729,7 +729,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                }
                rss[MM_SWAPENTS]++;
        } else if (is_migration_entry(entry)) {
-               page = migration_entry_to_page(entry);
+               page = pfn_swap_entry_to_page(entry);
 
                rss[mm_counter(page)]++;
 
@@ -748,7 +748,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                        set_pte_at(src_mm, addr, src_pte, pte);
                }
        } else if (is_device_private_entry(entry)) {
-               page = device_private_entry_to_page(entry);
+               page = pfn_swap_entry_to_page(entry);
 
                /*
                 * Update rss count even for unaddressable pages, as
@@ -1280,7 +1280,7 @@ again:
 
                entry = pte_to_swp_entry(ptent);
                if (is_device_private_entry(entry)) {
-                       struct page *page = device_private_entry_to_page(entry);
+                       struct page *page = pfn_swap_entry_to_page(entry);
 
                        if (unlikely(details && details->check_mapping)) {
                                /*
@@ -1309,7 +1309,7 @@ again:
                else if (is_migration_entry(entry)) {
                        struct page *page;
 
-                       page = migration_entry_to_page(entry);
+                       page = pfn_swap_entry_to_page(entry);
                        rss[mm_counter(page)]--;
                }
                if (unlikely(!free_swap_and_cache(entry)))
@@ -3372,7 +3372,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                        migration_entry_wait(vma->vm_mm, vmf->pmd,
                                             vmf->address);
                } else if (is_device_private_entry(entry)) {
-                       vmf->page = device_private_entry_to_page(entry);
+                       vmf->page = pfn_swap_entry_to_page(entry);
                        ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
                } else if (is_hwpoison_entry(entry)) {
                        ret = VM_FAULT_HWPOISON;
index 8810c1421f5dfcdd77680b4297b211b0b2ba631e..b4abb87249e166257008bcb7d618085ae26205e3 100644 (file)
@@ -296,7 +296,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
        if (!is_migration_entry(entry))
                goto out;
 
-       page = migration_entry_to_page(entry);
+       page = pfn_swap_entry_to_page(entry);
        page = compound_head(page);
 
        /*
@@ -337,7 +337,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
        ptl = pmd_lock(mm, pmd);
        if (!is_pmd_migration_entry(*pmd))
                goto unlock;
-       page = migration_entry_to_page(pmd_to_swp_entry(*pmd));
+       page = pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd));
        if (!get_page_unless_zero(page))
                goto unlock;
        spin_unlock(ptl);
@@ -2289,7 +2289,7 @@ again:
                        if (!is_device_private_entry(entry))
                                goto next;
 
-                       page = device_private_entry_to_page(entry);
+                       page = pfn_swap_entry_to_page(entry);
                        if (!(migrate->flags &
                                MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
                            page->pgmap->owner != migrate->pgmap_owner)
index a4435311754b08597c3cced04df12c3effd747e6..4df6093e759bd0b1d3e3ce42833d9964bbe23af4 100644 (file)
@@ -96,7 +96,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
                if (!is_migration_entry(entry))
                        return false;
 
-               pfn = migration_entry_to_pfn(entry);
+               pfn = swp_offset(entry);
        } else if (is_swap_pte(*pvmw->pte)) {
                swp_entry_t entry;
 
@@ -105,7 +105,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
                if (!is_device_private_entry(entry))
                        return false;
 
-               pfn = device_private_entry_to_pfn(entry);
+               pfn = swp_offset(entry);
        } else {
                if (!pte_present(*pvmw->pte))
                        return false;
@@ -233,7 +233,7 @@ restart:
                                        return not_found(pvmw);
                                entry = pmd_to_swp_entry(pmde);
                                if (!is_migration_entry(entry) ||
-                                   migration_entry_to_page(entry) != page)
+                                   pfn_swap_entry_to_page(entry) != page)
                                        return not_found(pvmw);
                                return true;
                        }