ipc/namespace.c: use a work queue to free_ipc
authorGiuseppe Scrivano <gscrivan@redhat.com>
Mon, 8 Jun 2020 04:40:10 +0000 (21:40 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 8 Jun 2020 18:05:56 +0000 (11:05 -0700)
the reason is to avoid a delay caused by the synchronize_rcu() call in
kern_umount() when the mqueue mount is freed.

the code:

    #define _GNU_SOURCE
    #include <sched.h>
    #include <error.h>
    #include <errno.h>
    #include <stdlib.h>

    int main()
    {
        int i;

        for (i = 0; i < 1000; i++)
            if (unshare(CLONE_NEWIPC) < 0)
                error(EXIT_FAILURE, errno, "unshare");
    }

goes from

Command being timed: "./ipc-namespace"
User time (seconds): 0.00
System time (seconds): 0.06
Percent of CPU this job got: 0%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:08.05

to

Command being timed: "./ipc-namespace"
User time (seconds): 0.00
System time (seconds): 0.02
Percent of CPU this job got: 96%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.03

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Manfred Spraul <manfred@colorfullife.com>
Link: http://lkml.kernel.org/r/20200225145419.527994-1-gscrivan@redhat.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/ipc_namespace.h
ipc/namespace.c

index c309f43bde45e086df82c7740aed8ddc3671bced..a06a78c67f19f8246b9769a87d2a3cc1ede2ff28 100644 (file)
@@ -68,6 +68,8 @@ struct ipc_namespace {
        struct user_namespace *user_ns;
        struct ucounts *ucounts;
 
+       struct llist_node mnt_llist;
+
        struct ns_common ns;
 } __randomize_layout;
 
index fdc3b5f3f53aa75796192460866c001f1571a2cc..24e7b45320f724be853fe15be748229f7732beb3 100644 (file)
@@ -117,6 +117,10 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
 
 static void free_ipc_ns(struct ipc_namespace *ns)
 {
+       /* mq_put_mnt() waits for a grace period as kern_unmount()
+        * uses synchronize_rcu().
+        */
+       mq_put_mnt(ns);
        sem_exit_ns(ns);
        msg_exit_ns(ns);
        shm_exit_ns(ns);
@@ -127,6 +131,21 @@ static void free_ipc_ns(struct ipc_namespace *ns)
        kfree(ns);
 }
 
+static LLIST_HEAD(free_ipc_list);
+static void free_ipc(struct work_struct *unused)
+{
+       struct llist_node *node = llist_del_all(&free_ipc_list);
+       struct ipc_namespace *n, *t;
+
+       llist_for_each_entry_safe(n, t, node, mnt_llist)
+               free_ipc_ns(n);
+}
+
+/*
+ * The work queue is used to avoid the cost of synchronize_rcu in kern_unmount.
+ */
+static DECLARE_WORK(free_ipc_work, free_ipc);
+
 /*
  * put_ipc_ns - drop a reference to an ipc namespace.
  * @ns: the namespace to put
@@ -148,8 +167,9 @@ void put_ipc_ns(struct ipc_namespace *ns)
        if (refcount_dec_and_lock(&ns->count, &mq_lock)) {
                mq_clear_sbinfo(ns);
                spin_unlock(&mq_lock);
-               mq_put_mnt(ns);
-               free_ipc_ns(ns);
+
+               if (llist_add(&ns->mnt_llist, &free_ipc_list))
+                       schedule_work(&free_ipc_work);
        }
 }