mm: selftests for exclusive device memory
authorAlistair Popple <apopple@nvidia.com>
Thu, 1 Jul 2021 01:54:28 +0000 (18:54 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 1 Jul 2021 18:06:03 +0000 (11:06 -0700)
Adds some selftests for exclusive device memory.

Link: https://lkml.kernel.org/r/20210616105937.23201-9-apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Acked-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
lib/test_hmm.c
lib/test_hmm_uapi.h
tools/testing/selftests/vm/hmm-tests.c

index fc7a20b..8c55c47 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/swapops.h>
 #include <linux/sched/mm.h>
 #include <linux/platform_device.h>
+#include <linux/rmap.h>
 
 #include "test_hmm_uapi.h"
 
@@ -46,6 +47,7 @@ struct dmirror_bounce {
        unsigned long           cpages;
 };
 
+#define DPT_XA_TAG_ATOMIC 1UL
 #define DPT_XA_TAG_WRITE 3UL
 
 /*
@@ -619,6 +621,54 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
        }
 }
 
+static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start,
+                            unsigned long end)
+{
+       unsigned long pfn;
+
+       for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
+               void *entry;
+               struct page *page;
+
+               entry = xa_load(&dmirror->pt, pfn);
+               page = xa_untag_pointer(entry);
+               if (xa_pointer_tag(entry) == DPT_XA_TAG_ATOMIC)
+                       return -EPERM;
+       }
+
+       return 0;
+}
+
+static int dmirror_atomic_map(unsigned long start, unsigned long end,
+                             struct page **pages, struct dmirror *dmirror)
+{
+       unsigned long pfn, mapped = 0;
+       int i;
+
+       /* Map the migrated pages into the device's page tables. */
+       mutex_lock(&dmirror->mutex);
+
+       for (i = 0, pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, i++) {
+               void *entry;
+
+               if (!pages[i])
+                       continue;
+
+               entry = pages[i];
+               entry = xa_tag_pointer(entry, DPT_XA_TAG_ATOMIC);
+               entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
+               if (xa_is_err(entry)) {
+                       mutex_unlock(&dmirror->mutex);
+                       return xa_err(entry);
+               }
+
+               mapped++;
+       }
+
+       mutex_unlock(&dmirror->mutex);
+       return mapped;
+}
+
 static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
                                            struct dmirror *dmirror)
 {
@@ -661,6 +711,72 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
        return 0;
 }
 
+static int dmirror_exclusive(struct dmirror *dmirror,
+                            struct hmm_dmirror_cmd *cmd)
+{
+       unsigned long start, end, addr;
+       unsigned long size = cmd->npages << PAGE_SHIFT;
+       struct mm_struct *mm = dmirror->notifier.mm;
+       struct page *pages[64];
+       struct dmirror_bounce bounce;
+       unsigned long next;
+       int ret;
+
+       start = cmd->addr;
+       end = start + size;
+       if (end < start)
+               return -EINVAL;
+
+       /* Since the mm is for the mirrored process, get a reference first. */
+       if (!mmget_not_zero(mm))
+               return -EINVAL;
+
+       mmap_read_lock(mm);
+       for (addr = start; addr < end; addr = next) {
+               unsigned long mapped;
+               int i;
+
+               if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT))
+                       next = end;
+               else
+                       next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT);
+
+               ret = make_device_exclusive_range(mm, addr, next, pages, NULL);
+               mapped = dmirror_atomic_map(addr, next, pages, dmirror);
+               for (i = 0; i < ret; i++) {
+                       if (pages[i]) {
+                               unlock_page(pages[i]);
+                               put_page(pages[i]);
+                       }
+               }
+
+               if (addr + (mapped << PAGE_SHIFT) < next) {
+                       mmap_read_unlock(mm);
+                       mmput(mm);
+                       return -EBUSY;
+               }
+       }
+       mmap_read_unlock(mm);
+       mmput(mm);
+
+       /* Return the migrated data for verification. */
+       ret = dmirror_bounce_init(&bounce, start, size);
+       if (ret)
+               return ret;
+       mutex_lock(&dmirror->mutex);
+       ret = dmirror_do_read(dmirror, start, end, &bounce);
+       mutex_unlock(&dmirror->mutex);
+       if (ret == 0) {
+               if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
+                                bounce.size))
+                       ret = -EFAULT;
+       }
+
+       cmd->cpages = bounce.cpages;
+       dmirror_bounce_fini(&bounce);
+       return ret;
+}
+
 static int dmirror_migrate(struct dmirror *dmirror,
                           struct hmm_dmirror_cmd *cmd)
 {
@@ -948,6 +1064,15 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
                ret = dmirror_migrate(dmirror, &cmd);
                break;
 
+       case HMM_DMIRROR_EXCLUSIVE:
+               ret = dmirror_exclusive(dmirror, &cmd);
+               break;
+
+       case HMM_DMIRROR_CHECK_EXCLUSIVE:
+               ret = dmirror_check_atomic(dmirror, cmd.addr,
+                                       cmd.addr + (cmd.npages << PAGE_SHIFT));
+               break;
+
        case HMM_DMIRROR_SNAPSHOT:
                ret = dmirror_snapshot(dmirror, &cmd);
                break;
index 670b4ef..f14dea5 100644 (file)
@@ -33,6 +33,8 @@ struct hmm_dmirror_cmd {
 #define HMM_DMIRROR_WRITE              _IOWR('H', 0x01, struct hmm_dmirror_cmd)
 #define HMM_DMIRROR_MIGRATE            _IOWR('H', 0x02, struct hmm_dmirror_cmd)
 #define HMM_DMIRROR_SNAPSHOT           _IOWR('H', 0x03, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_EXCLUSIVE          _IOWR('H', 0x04, struct hmm_dmirror_cmd)
+#define HMM_DMIRROR_CHECK_EXCLUSIVE    _IOWR('H', 0x05, struct hmm_dmirror_cmd)
 
 /*
  * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
index 5d1ac69..864f126 100644 (file)
@@ -1485,4 +1485,162 @@ TEST_F(hmm2, double_map)
        hmm_buffer_free(buffer);
 }
 
+/*
+ * Basic check of exclusive faulting.
+ */
+TEST_F(hmm, exclusive)
+{
+       struct hmm_buffer *buffer;
+       unsigned long npages;
+       unsigned long size;
+       unsigned long i;
+       int *ptr;
+       int ret;
+
+       npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+       ASSERT_NE(npages, 0);
+       size = npages << self->page_shift;
+
+       buffer = malloc(sizeof(*buffer));
+       ASSERT_NE(buffer, NULL);
+
+       buffer->fd = -1;
+       buffer->size = size;
+       buffer->mirror = malloc(size);
+       ASSERT_NE(buffer->mirror, NULL);
+
+       buffer->ptr = mmap(NULL, size,
+                          PROT_READ | PROT_WRITE,
+                          MAP_PRIVATE | MAP_ANONYMOUS,
+                          buffer->fd, 0);
+       ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+       /* Initialize buffer in system memory. */
+       for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+               ptr[i] = i;
+
+       /* Map memory exclusively for device access. */
+       ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+       ASSERT_EQ(ret, 0);
+       ASSERT_EQ(buffer->cpages, npages);
+
+       /* Check what the device read. */
+       for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+               ASSERT_EQ(ptr[i], i);
+
+       /* Fault pages back to system memory and check them. */
+       for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+               ASSERT_EQ(ptr[i]++, i);
+
+       for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+               ASSERT_EQ(ptr[i], i+1);
+
+       /* Check atomic access revoked */
+       ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_CHECK_EXCLUSIVE, buffer, npages);
+       ASSERT_EQ(ret, 0);
+
+       hmm_buffer_free(buffer);
+}
+
+TEST_F(hmm, exclusive_mprotect)
+{
+       struct hmm_buffer *buffer;
+       unsigned long npages;
+       unsigned long size;
+       unsigned long i;
+       int *ptr;
+       int ret;
+
+       npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+       ASSERT_NE(npages, 0);
+       size = npages << self->page_shift;
+
+       buffer = malloc(sizeof(*buffer));
+       ASSERT_NE(buffer, NULL);
+
+       buffer->fd = -1;
+       buffer->size = size;
+       buffer->mirror = malloc(size);
+       ASSERT_NE(buffer->mirror, NULL);
+
+       buffer->ptr = mmap(NULL, size,
+                          PROT_READ | PROT_WRITE,
+                          MAP_PRIVATE | MAP_ANONYMOUS,
+                          buffer->fd, 0);
+       ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+       /* Initialize buffer in system memory. */
+       for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+               ptr[i] = i;
+
+       /* Map memory exclusively for device access. */
+       ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+       ASSERT_EQ(ret, 0);
+       ASSERT_EQ(buffer->cpages, npages);
+
+       /* Check what the device read. */
+       for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+               ASSERT_EQ(ptr[i], i);
+
+       ret = mprotect(buffer->ptr, size, PROT_READ);
+       ASSERT_EQ(ret, 0);
+
+       /* Simulate a device writing system memory. */
+       ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+       ASSERT_EQ(ret, -EPERM);
+
+       hmm_buffer_free(buffer);
+}
+
+/*
+ * Check copy-on-write works.
+ */
+TEST_F(hmm, exclusive_cow)
+{
+       struct hmm_buffer *buffer;
+       unsigned long npages;
+       unsigned long size;
+       unsigned long i;
+       int *ptr;
+       int ret;
+
+       npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+       ASSERT_NE(npages, 0);
+       size = npages << self->page_shift;
+
+       buffer = malloc(sizeof(*buffer));
+       ASSERT_NE(buffer, NULL);
+
+       buffer->fd = -1;
+       buffer->size = size;
+       buffer->mirror = malloc(size);
+       ASSERT_NE(buffer->mirror, NULL);
+
+       buffer->ptr = mmap(NULL, size,
+                          PROT_READ | PROT_WRITE,
+                          MAP_PRIVATE | MAP_ANONYMOUS,
+                          buffer->fd, 0);
+       ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+       /* Initialize buffer in system memory. */
+       for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+               ptr[i] = i;
+
+       /* Map memory exclusively for device access. */
+       ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+       ASSERT_EQ(ret, 0);
+       ASSERT_EQ(buffer->cpages, npages);
+
+       fork();
+
+       /* Fault pages back to system memory and check them. */
+       for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+               ASSERT_EQ(ptr[i]++, i);
+
+       for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+               ASSERT_EQ(ptr[i], i+1);
+
+       hmm_buffer_free(buffer);
+}
+
 TEST_HARNESS_MAIN