net: Kill net_mutex
authorKirill Tkhai <ktkhai@virtuozzo.com>
Mon, 19 Feb 2018 09:58:38 +0000 (12:58 +0300)
committerDavid S. Miller <davem@davemloft.net>
Tue, 20 Feb 2018 18:23:13 +0000 (13:23 -0500)
We take net_mutex, when there are !async pernet_operations
registered, and read locking of net_sem is not enough. But
we may get rid of taking the mutex, and just change the logic
to write lock net_sem in such cases. This obviously reduces
the number of lock operations, we do.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/rtnetlink.h
include/net/net_namespace.h
net/core/net_namespace.c

index e9ee9ad..3573b4b 100644 (file)
@@ -35,7 +35,6 @@ extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
 
 extern wait_queue_head_t netdev_unregistering_wq;
-extern struct mutex net_mutex;
 extern struct rw_semaphore net_sem;
 
 #ifdef CONFIG_PROVE_LOCKING
index 9158ec1..115b01b 100644 (file)
@@ -60,8 +60,11 @@ struct net {
 
        struct list_head        list;           /* list of network namespaces */
        struct list_head        cleanup_list;   /* namespaces on death row */
-       struct list_head        exit_list;      /* Use only net_mutex */
-
+       struct list_head        exit_list;      /* To linked to call pernet exit
+                                                * methods on dead net (net_sem
+                                                * read locked), or to unregister
+                                                * pernet ops (net_sem wr locked).
+                                                */
        struct user_namespace   *user_ns;       /* Owning user namespace */
        struct ucounts          *ucounts;
        spinlock_t              nsid_lock;
@@ -89,7 +92,7 @@ struct net {
        /* core fib_rules */
        struct list_head        rules_ops;
 
-       struct list_head        fib_notifier_ops;  /* protected by net_mutex */
+       struct list_head        fib_notifier_ops;  /* protected by net_sem */
 
        struct net_device       *loopback_dev;          /* The loopback */
        struct netns_core       core;
@@ -316,7 +319,7 @@ struct pernet_operations {
        /*
         * Indicates above methods are allowed to be executed in parallel
         * with methods of any other pernet_operations, i.e. they are not
-        * need synchronization via net_mutex.
+        * need write locked net_sem.
         */
        bool async;
 };
index bcab9a9..e89a516 100644 (file)
@@ -29,8 +29,6 @@
 
 static LIST_HEAD(pernet_list);
 static struct list_head *first_device = &pernet_list;
-/* Used only if there are !async pernet_operations registered */
-DEFINE_MUTEX(net_mutex);
 
 LIST_HEAD(net_namespace_list);
 EXPORT_SYMBOL_GPL(net_namespace_list);
@@ -407,6 +405,7 @@ struct net *copy_net_ns(unsigned long flags,
 {
        struct ucounts *ucounts;
        struct net *net;
+       unsigned write;
        int rv;
 
        if (!(flags & CLONE_NEWNET))
@@ -424,20 +423,26 @@ struct net *copy_net_ns(unsigned long flags,
        refcount_set(&net->passive, 1);
        net->ucounts = ucounts;
        get_user_ns(user_ns);
-
-       rv = down_read_killable(&net_sem);
+again:
+       write = READ_ONCE(nr_sync_pernet_ops);
+       if (write)
+               rv = down_write_killable(&net_sem);
+       else
+               rv = down_read_killable(&net_sem);
        if (rv < 0)
                goto put_userns;
-       if (nr_sync_pernet_ops) {
-               rv = mutex_lock_killable(&net_mutex);
-               if (rv < 0)
-                       goto up_read;
+
+       if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+               up_read(&net_sem);
+               goto again;
        }
        rv = setup_net(net, user_ns);
-       if (nr_sync_pernet_ops)
-               mutex_unlock(&net_mutex);
-up_read:
-       up_read(&net_sem);
+
+       if (write)
+               up_write(&net_sem);
+       else
+               up_read(&net_sem);
+
        if (rv < 0) {
 put_userns:
                put_user_ns(user_ns);
@@ -485,15 +490,23 @@ static void cleanup_net(struct work_struct *work)
        struct net *net, *tmp, *last;
        struct list_head net_kill_list;
        LIST_HEAD(net_exit_list);
+       unsigned write;
 
        /* Atomically snapshot the list of namespaces to cleanup */
        spin_lock_irq(&cleanup_list_lock);
        list_replace_init(&cleanup_list, &net_kill_list);
        spin_unlock_irq(&cleanup_list_lock);
+again:
+       write = READ_ONCE(nr_sync_pernet_ops);
+       if (write)
+               down_write(&net_sem);
+       else
+               down_read(&net_sem);
 
-       down_read(&net_sem);
-       if (nr_sync_pernet_ops)
-               mutex_lock(&net_mutex);
+       if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+               up_read(&net_sem);
+               goto again;
+       }
 
        /* Don't let anyone else find us. */
        rtnl_lock();
@@ -528,14 +541,14 @@ static void cleanup_net(struct work_struct *work)
        list_for_each_entry_reverse(ops, &pernet_list, list)
                ops_exit_list(ops, &net_exit_list);
 
-       if (nr_sync_pernet_ops)
-               mutex_unlock(&net_mutex);
-
        /* Free the net generic variables */
        list_for_each_entry_reverse(ops, &pernet_list, list)
                ops_free_list(ops, &net_exit_list);
 
-       up_read(&net_sem);
+       if (write)
+               up_write(&net_sem);
+       else
+               up_read(&net_sem);
 
        /* Ensure there are no outstanding rcu callbacks using this
         * network namespace.
@@ -563,8 +576,6 @@ static void cleanup_net(struct work_struct *work)
 void net_ns_barrier(void)
 {
        down_write(&net_sem);
-       mutex_lock(&net_mutex);
-       mutex_unlock(&net_mutex);
        up_write(&net_sem);
 }
 EXPORT_SYMBOL(net_ns_barrier);