mmap locking API: add mmap_read_trylock_non_owner()
authorMichel Lespinasse <walken@google.com>
Tue, 9 Jun 2020 04:33:37 +0000 (21:33 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 9 Jun 2020 16:39:14 +0000 (09:39 -0700)
Add a couple APIs used by kernel/bpf/stackmap.c only:
- mmap_read_trylock_non_owner()
- mmap_read_unlock_non_owner() (may be called from a work queue).

It's still not ideal that bpf/stackmap subverts the lock ownership in this
way.  Thanks to Peter Zijlstra for suggesting this API as the least-ugly
way of addressing this in the short term.

Signed-off-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Davidlohr Bueso <dbueso@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ying Han <yinghan@google.com>
Link: http://lkml.kernel.org/r/20200520052908.204642-8-walken@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/mmap_lock.h
kernel/bpf/stackmap.c

index a757cb3..d1826ce 100644 (file)
@@ -56,4 +56,18 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
        up_read(&mm->mmap_sem);
 }
 
+static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
+{
+       if (down_read_trylock(&mm->mmap_sem)) {
+               rwsem_release(&mm->mmap_sem.dep_map, _RET_IP_);
+               return true;
+       }
+       return false;
+}
+
+static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
+{
+       up_read_non_owner(&mm->mmap_sem);
+}
+
 #endif /* _LINUX_MMAP_LOCK_H */
index a13b7e2..599488f 100644 (file)
@@ -33,7 +33,7 @@ struct bpf_stack_map {
 /* irq_work to run up_read() for build_id lookup in nmi context */
 struct stack_map_irq_work {
        struct irq_work irq_work;
-       struct rw_semaphore *sem;
+       struct mm_struct *mm;
 };
 
 static void do_up_read(struct irq_work *entry)
@@ -44,8 +44,7 @@ static void do_up_read(struct irq_work *entry)
                return;
 
        work = container_of(entry, struct stack_map_irq_work, irq_work);
-       up_read_non_owner(work->sem);
-       work->sem = NULL;
+       mmap_read_unlock_non_owner(work->mm);
 }
 
 static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
@@ -317,7 +316,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
         * with build_id.
         */
        if (!user || !current || !current->mm || irq_work_busy ||
-           mmap_read_trylock(current->mm) == 0) {
+           !mmap_read_trylock_non_owner(current->mm)) {
                /* cannot access current->mm, fall back to ips */
                for (i = 0; i < trace_nr; i++) {
                        id_offs[i].status = BPF_STACK_BUILD_ID_IP;
@@ -342,16 +341,10 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
        }
 
        if (!work) {
-               mmap_read_unlock(current->mm);
+               mmap_read_unlock_non_owner(current->mm);
        } else {
-               work->sem = &current->mm->mmap_sem;
+               work->mm = current->mm;
                irq_work_queue(&work->irq_work);
-               /*
-                * The irq_work will release the mmap_sem with
-                * up_read_non_owner(). The rwsem_release() is called
-                * here to release the lock from lockdep's perspective.
-                */
-               rwsem_release(&current->mm->mmap_sem.dep_map, _RET_IP_);
        }
 }