mm: add account_locked_vm utility function
authorDaniel Jordan <daniel.m.jordan@oracle.com>
Tue, 16 Jul 2019 23:30:54 +0000 (16:30 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 17 Jul 2019 02:23:25 +0000 (19:23 -0700)
locked_vm accounting is done roughly the same way in five places, so
unify them in a helper.

Include the helper's caller in the debug print to distinguish between
callsites.

Error codes stay the same, so user-visible behavior does too.  The one
exception is that the -EPERM case in tce_account_locked_vm is removed
because Alexey has never seen it triggered.

[daniel.m.jordan@oracle.com: v3]
Link: http://lkml.kernel.org/r/20190529205019.20927-1-daniel.m.jordan@oracle.com
[sfr@canb.auug.org.au: fix mm/util.c]
Link: http://lkml.kernel.org/r/20190524175045.26897-1-daniel.m.jordan@oracle.com
Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Cc: Alan Tull <atull@kernel.org>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Moritz Fischer <mdf@kernel.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Steve Sistare <steven.sistare@oracle.com>
Cc: Wu Hao <hao.wu@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/powerpc/kvm/book3s_64_vio.c
arch/powerpc/mm/book3s64/iommu_api.c
drivers/fpga/dfl-afu-dma-region.c
drivers/vfio/vfio_iommu_spapr_tce.c
drivers/vfio/vfio_iommu_type1.c
include/linux/mm.h
mm/util.c

index 5bf05cc..e99a147 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/iommu.h>
 #include <linux/file.h>
+#include <linux/mm.h>
 
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
@@ -45,43 +46,6 @@ static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
        return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
 }
 
-static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
-{
-       long ret = 0;
-
-       if (!current || !current->mm)
-               return ret; /* process exited */
-
-       down_write(&current->mm->mmap_sem);
-
-       if (inc) {
-               unsigned long locked, lock_limit;
-
-               locked = current->mm->locked_vm + stt_pages;
-               lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-                       ret = -ENOMEM;
-               else
-                       current->mm->locked_vm += stt_pages;
-       } else {
-               if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
-                       stt_pages = current->mm->locked_vm;
-
-               current->mm->locked_vm -= stt_pages;
-       }
-
-       pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
-                       inc ? '+' : '-',
-                       stt_pages << PAGE_SHIFT,
-                       current->mm->locked_vm << PAGE_SHIFT,
-                       rlimit(RLIMIT_MEMLOCK),
-                       ret ? " - exceeded" : "");
-
-       up_write(&current->mm->mmap_sem);
-
-       return ret;
-}
-
 static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
 {
        struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
@@ -291,7 +255,7 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
 
        kvm_put_kvm(stt->kvm);
 
-       kvmppc_account_memlimit(
+       account_locked_vm(current->mm,
                kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
        call_rcu(&stt->rcu, release_spapr_tce_table);
 
@@ -316,7 +280,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
                return -EINVAL;
 
        npages = kvmppc_tce_pages(size);
-       ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
+       ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true);
        if (ret)
                return ret;
 
@@ -362,7 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 
        kfree(stt);
  fail_acct:
-       kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
+       account_locked_vm(current->mm, kvmppc_stt_pages(npages), false);
        return ret;
 }
 
index 90ee3a8..b056cae 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/hugetlb.h>
 #include <linux/swap.h>
 #include <linux/sizes.h>
+#include <linux/mm.h>
 #include <asm/mmu_context.h>
 #include <asm/pte-walk.h>
 #include <linux/mm_inline.h>
@@ -46,40 +47,6 @@ struct mm_iommu_table_group_mem_t {
        u64 dev_hpa;            /* Device memory base address */
 };
 
-static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
-               unsigned long npages, bool incr)
-{
-       long ret = 0, locked, lock_limit;
-
-       if (!npages)
-               return 0;
-
-       down_write(&mm->mmap_sem);
-
-       if (incr) {
-               locked = mm->locked_vm + npages;
-               lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-                       ret = -ENOMEM;
-               else
-                       mm->locked_vm += npages;
-       } else {
-               if (WARN_ON_ONCE(npages > mm->locked_vm))
-                       npages = mm->locked_vm;
-               mm->locked_vm -= npages;
-       }
-
-       pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
-                       current ? current->pid : 0,
-                       incr ? '+' : '-',
-                       npages << PAGE_SHIFT,
-                       mm->locked_vm << PAGE_SHIFT,
-                       rlimit(RLIMIT_MEMLOCK));
-       up_write(&mm->mmap_sem);
-
-       return ret;
-}
-
 bool mm_iommu_preregistered(struct mm_struct *mm)
 {
        return !list_empty(&mm->context.iommu_group_mem_list);
@@ -96,7 +63,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
        unsigned long entry, chunk;
 
        if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
-               ret = mm_iommu_adjust_locked_vm(mm, entries, true);
+               ret = account_locked_vm(mm, entries, true);
                if (ret)
                        return ret;
 
@@ -211,7 +178,7 @@ free_exit:
        kfree(mem);
 
 unlock_exit:
-       mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+       account_locked_vm(mm, locked_entries, false);
 
        return ret;
 }
@@ -311,7 +278,7 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
 unlock_exit:
        mutex_unlock(&mem_list_mutex);
 
-       mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
+       account_locked_vm(mm, unlock_entries, false);
 
        return ret;
 }
index dcd80b0..62f9244 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/sched/signal.h>
 #include <linux/uaccess.h>
+#include <linux/mm.h>
 
 #include "dfl-afu.h"
 
@@ -32,52 +33,6 @@ void afu_dma_region_init(struct dfl_feature_platform_data *pdata)
 }
 
 /**
- * afu_dma_adjust_locked_vm - adjust locked memory
- * @dev: port device
- * @npages: number of pages
- * @incr: increase or decrease locked memory
- *
- * Increase or decrease the locked memory size with npages input.
- *
- * Return 0 on success.
- * Return -ENOMEM if locked memory size is over the limit and no CAP_IPC_LOCK.
- */
-static int afu_dma_adjust_locked_vm(struct device *dev, long npages, bool incr)
-{
-       unsigned long locked, lock_limit;
-       int ret = 0;
-
-       /* the task is exiting. */
-       if (!current->mm)
-               return 0;
-
-       down_write(&current->mm->mmap_sem);
-
-       if (incr) {
-               locked = current->mm->locked_vm + npages;
-               lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-               if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-                       ret = -ENOMEM;
-               else
-                       current->mm->locked_vm += npages;
-       } else {
-               if (WARN_ON_ONCE(npages > current->mm->locked_vm))
-                       npages = current->mm->locked_vm;
-               current->mm->locked_vm -= npages;
-       }
-
-       dev_dbg(dev, "[%d] RLIMIT_MEMLOCK %c%ld %ld/%ld%s\n", current->pid,
-               incr ? '+' : '-', npages << PAGE_SHIFT,
-               current->mm->locked_vm << PAGE_SHIFT, rlimit(RLIMIT_MEMLOCK),
-               ret ? "- exceeded" : "");
-
-       up_write(&current->mm->mmap_sem);
-
-       return ret;
-}
-
-/**
  * afu_dma_pin_pages - pin pages of given dma memory region
  * @pdata: feature device platform data
  * @region: dma memory region to be pinned
@@ -92,7 +47,7 @@ static int afu_dma_pin_pages(struct dfl_feature_platform_data *pdata,
        struct device *dev = &pdata->dev->dev;
        int ret, pinned;
 
-       ret = afu_dma_adjust_locked_vm(dev, npages, true);
+       ret = account_locked_vm(current->mm, npages, true);
        if (ret)
                return ret;
 
@@ -121,7 +76,7 @@ put_pages:
 free_pages:
        kfree(region->pages);
 unlock_vm:
-       afu_dma_adjust_locked_vm(dev, npages, false);
+       account_locked_vm(current->mm, npages, false);
        return ret;
 }
 
@@ -141,7 +96,7 @@ static void afu_dma_unpin_pages(struct dfl_feature_platform_data *pdata,
 
        put_all_pages(region->pages, npages);
        kfree(region->pages);
-       afu_dma_adjust_locked_vm(dev, npages, false);
+       account_locked_vm(current->mm, npages, false);
 
        dev_dbg(dev, "%ld pages unpinned\n", npages);
 }
index 7048c91..8ce9ad2 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/vmalloc.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
+#include <linux/mm.h>
 
 #include <asm/iommu.h>
 #include <asm/tce.h>
 static void tce_iommu_detach_group(void *iommu_data,
                struct iommu_group *iommu_group);
 
-static long try_increment_locked_vm(struct mm_struct *mm, long npages)
-{
-       long ret = 0, locked, lock_limit;
-
-       if (WARN_ON_ONCE(!mm))
-               return -EPERM;
-
-       if (!npages)
-               return 0;
-
-       down_write(&mm->mmap_sem);
-       locked = mm->locked_vm + npages;
-       lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-       if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-               ret = -ENOMEM;
-       else
-               mm->locked_vm += npages;
-
-       pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
-                       npages << PAGE_SHIFT,
-                       mm->locked_vm << PAGE_SHIFT,
-                       rlimit(RLIMIT_MEMLOCK),
-                       ret ? " - exceeded" : "");
-
-       up_write(&mm->mmap_sem);
-
-       return ret;
-}
-
-static void decrement_locked_vm(struct mm_struct *mm, long npages)
-{
-       if (!mm || !npages)
-               return;
-
-       down_write(&mm->mmap_sem);
-       if (WARN_ON_ONCE(npages > mm->locked_vm))
-               npages = mm->locked_vm;
-       mm->locked_vm -= npages;
-       pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
-                       npages << PAGE_SHIFT,
-                       mm->locked_vm << PAGE_SHIFT,
-                       rlimit(RLIMIT_MEMLOCK));
-       up_write(&mm->mmap_sem);
-}
-
 /*
  * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
  *
@@ -333,7 +289,7 @@ static int tce_iommu_enable(struct tce_container *container)
                return ret;
 
        locked = table_group->tce32_size >> PAGE_SHIFT;
-       ret = try_increment_locked_vm(container->mm, locked);
+       ret = account_locked_vm(container->mm, locked, true);
        if (ret)
                return ret;
 
@@ -352,7 +308,7 @@ static void tce_iommu_disable(struct tce_container *container)
        container->enabled = false;
 
        BUG_ON(!container->mm);
-       decrement_locked_vm(container->mm, container->locked_pages);
+       account_locked_vm(container->mm, container->locked_pages, false);
 }
 
 static void *tce_iommu_open(unsigned long arg)
@@ -656,7 +612,7 @@ static long tce_iommu_create_table(struct tce_container *container,
        if (!table_size)
                return -EINVAL;
 
-       ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT);
+       ret = account_locked_vm(container->mm, table_size >> PAGE_SHIFT, true);
        if (ret)
                return ret;
 
@@ -675,7 +631,7 @@ static void tce_iommu_free_table(struct tce_container *container,
        unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
 
        iommu_tce_table_put(tbl);
-       decrement_locked_vm(container->mm, pages);
+       account_locked_vm(container->mm, pages, false);
 }
 
 static long tce_iommu_create_window(struct tce_container *container,
index add34ad..054391f 100644 (file)
@@ -272,21 +272,8 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
 
        ret = down_write_killable(&mm->mmap_sem);
        if (!ret) {
-               if (npage > 0) {
-                       if (!dma->lock_cap) {
-                               unsigned long limit;
-
-                               limit = task_rlimit(dma->task,
-                                               RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-                               if (mm->locked_vm + npage > limit)
-                                       ret = -ENOMEM;
-                       }
-               }
-
-               if (!ret)
-                       mm->locked_vm += npage;
-
+               ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task,
+                                         dma->lock_cap);
                up_write(&mm->mmap_sem);
        }
 
index f43f4de..bd65125 100644 (file)
@@ -1543,6 +1543,10 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 int get_user_pages_fast(unsigned long start, int nr_pages,
                        unsigned int gup_flags, struct page **pages);
 
+int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc);
+int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
+                       struct task_struct *task, bool bypass_rlim);
+
 /* Container for pinned pfns / pages */
 struct frame_vector {
        unsigned int nr_allocated;      /* Number of frames we have space for */
index 68575a3..e6351a8 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -7,6 +7,7 @@
 #include <linux/err.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/task_stack.h>
 #include <linux/security.h>
 #include <linux/swap.h>
@@ -300,6 +301,80 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 }
 #endif
 
+/**
+ * __account_locked_vm - account locked pages to an mm's locked_vm
+ * @mm:          mm to account against
+ * @pages:       number of pages to account
+ * @inc:         %true if @pages should be considered positive, %false if not
+ * @task:        task used to check RLIMIT_MEMLOCK
+ * @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped
+ *
+ * Assumes @task and @mm are valid (i.e. at least one reference on each), and
+ * that mmap_sem is held as writer.
+ *
+ * Return:
+ * * 0       on success
+ * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded.
+ */
+int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
+                       struct task_struct *task, bool bypass_rlim)
+{
+       unsigned long locked_vm, limit;
+       int ret = 0;
+
+       lockdep_assert_held_write(&mm->mmap_sem);
+
+       locked_vm = mm->locked_vm;
+       if (inc) {
+               if (!bypass_rlim) {
+                       limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+                       if (locked_vm + pages > limit)
+                               ret = -ENOMEM;
+               }
+               if (!ret)
+                       mm->locked_vm = locked_vm + pages;
+       } else {
+               WARN_ON_ONCE(pages > locked_vm);
+               mm->locked_vm = locked_vm - pages;
+       }
+
+       pr_debug("%s: [%d] caller %ps %c%lu %lu/%lu%s\n", __func__, task->pid,
+                (void *)_RET_IP_, (inc) ? '+' : '-', pages << PAGE_SHIFT,
+                locked_vm << PAGE_SHIFT, task_rlimit(task, RLIMIT_MEMLOCK),
+                ret ? " - exceeded" : "");
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(__account_locked_vm);
+
+/**
+ * account_locked_vm - account locked pages to an mm's locked_vm
+ * @mm:          mm to account against, may be NULL
+ * @pages:       number of pages to account
+ * @inc:         %true if @pages should be considered positive, %false if not
+ *
+ * Assumes a non-NULL @mm is valid (i.e. at least one reference on it).
+ *
+ * Return:
+ * * 0       on success, or if mm is NULL
+ * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded.
+ */
+int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc)
+{
+       int ret;
+
+       if (pages == 0 || !mm)
+               return 0;
+
+       down_write(&mm->mmap_sem);
+       ret = __account_locked_vm(mm, pages, inc, current,
+                                 capable(CAP_IPC_LOCK));
+       up_write(&mm->mmap_sem);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(account_locked_vm);
+
 unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
        unsigned long len, unsigned long prot,
        unsigned long flag, unsigned long pgoff)