netfilter: ipt_CLUSTERIP: fix sleep-in-atomic bug in clusterip_config_entry_put()
authorTaehee Yoo <ap420073@gmail.com>
Mon, 5 Nov 2018 09:23:13 +0000 (18:23 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 27 Feb 2019 09:09:03 +0000 (10:09 +0100)
commit 2a61d8b883bbad26b06d2e6cc3777a697e78830d upstream.

A proc_remove() can sleep. so that it can't be inside of spin_lock.
Hence proc_remove() is moved to outside of spin_lock. and it also
adds mutex to sync create and remove of proc entry(config->pde).

test commands:
SHELL#1
   %while :; do iptables -A INPUT -p udp -i enp2s0 -d 192.168.1.100 \
   --dport 9000  -j CLUSTERIP --new --hashmode sourceip \
   --clustermac 01:00:5e:00:00:21 --total-nodes 3 --local-node 3; \
   iptables -F; done

SHELL#2
   %while :; do echo +1 > /proc/net/ipt_CLUSTERIP/192.168.1.100; \
   echo -1 > /proc/net/ipt_CLUSTERIP/192.168.1.100; done

[ 2949.569864] BUG: sleeping function called from invalid context at kernel/sched/completion.c:99
[ 2949.579944] in_atomic(): 1, irqs_disabled(): 0, pid: 5472, name: iptables
[ 2949.587920] 1 lock held by iptables/5472:
[ 2949.592711]  #0: 000000008f0ebcf2 (&(&cn->lock)->rlock){+...}, at: refcount_dec_and_lock+0x24/0x50
[ 2949.603307] CPU: 1 PID: 5472 Comm: iptables Tainted: G        W         4.19.0-rc5+ #16
[ 2949.604212] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015
[ 2949.604212] Call Trace:
[ 2949.604212]  dump_stack+0xc9/0x16b
[ 2949.604212]  ? show_regs_print_info+0x5/0x5
[ 2949.604212]  ___might_sleep+0x2eb/0x420
[ 2949.604212]  ? set_rq_offline.part.87+0x140/0x140
[ 2949.604212]  ? _rcu_barrier_trace+0x400/0x400
[ 2949.604212]  wait_for_completion+0x94/0x710
[ 2949.604212]  ? wait_for_completion_interruptible+0x780/0x780
[ 2949.604212]  ? __kernel_text_address+0xe/0x30
[ 2949.604212]  ? __lockdep_init_map+0x10e/0x5c0
[ 2949.604212]  ? __lockdep_init_map+0x10e/0x5c0
[ 2949.604212]  ? __init_waitqueue_head+0x86/0x130
[ 2949.604212]  ? init_wait_entry+0x1a0/0x1a0
[ 2949.604212]  proc_entry_rundown+0x208/0x270
[ 2949.604212]  ? proc_reg_get_unmapped_area+0x370/0x370
[ 2949.604212]  ? __lock_acquire+0x4500/0x4500
[ 2949.604212]  ? complete+0x18/0x70
[ 2949.604212]  remove_proc_subtree+0x143/0x2a0
[ 2949.708655]  ? remove_proc_entry+0x390/0x390
[ 2949.708655]  clusterip_tg_destroy+0x27a/0x630 [ipt_CLUSTERIP]
[ ... ]

Fixes: b3e456fce9f5 ("netfilter: ipt_CLUSTERIP: fix a race condition of proc file creation")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
net/ipv4/netfilter/ipt_CLUSTERIP.c

index fb1e7f2..3cd237b 100644 (file)
@@ -56,7 +56,7 @@ struct clusterip_config {
 #endif
        enum clusterip_hashmode hash_mode;      /* which hashing mode */
        u_int32_t hash_initval;                 /* hash initialization */
-       struct rcu_head rcu;
+       struct rcu_head rcu;                    /* for call_rcu_bh */
        struct net *net;                        /* netns for pernet list */
        char ifname[IFNAMSIZ];                  /* device ifname */
 };
@@ -72,6 +72,8 @@ struct clusterip_net {
 
 #ifdef CONFIG_PROC_FS
        struct proc_dir_entry *procdir;
+       /* mutex protects the config->pde*/
+       struct mutex mutex;
 #endif
 };
 
@@ -118,17 +120,18 @@ clusterip_config_entry_put(struct clusterip_config *c)
 
        local_bh_disable();
        if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
+               list_del_rcu(&c->list);
+               spin_unlock(&cn->lock);
+               local_bh_enable();
                /* In case anyone still accesses the file, the open/close
                 * functions are also incrementing the refcount on their own,
                 * so it's safe to remove the entry even if it's in use. */
 #ifdef CONFIG_PROC_FS
+               mutex_lock(&cn->mutex);
                if (cn->procdir)
                        proc_remove(c->pde);
+               mutex_unlock(&cn->mutex);
 #endif
-               list_del_rcu(&c->list);
-               spin_unlock(&cn->lock);
-               local_bh_enable();
-
                return;
        }
        local_bh_enable();
@@ -278,9 +281,11 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
 
                /* create proc dir entry */
                sprintf(buffer, "%pI4", &ip);
+               mutex_lock(&cn->mutex);
                c->pde = proc_create_data(buffer, 0600,
                                          cn->procdir,
                                          &clusterip_proc_fops, c);
+               mutex_unlock(&cn->mutex);
                if (!c->pde) {
                        err = -ENOMEM;
                        goto err;
@@ -833,6 +838,7 @@ static int clusterip_net_init(struct net *net)
                pr_err("Unable to proc dir entry\n");
                return -ENOMEM;
        }
+       mutex_init(&cn->mutex);
 #endif /* CONFIG_PROC_FS */
 
        return 0;
@@ -841,9 +847,12 @@ static int clusterip_net_init(struct net *net)
 static void clusterip_net_exit(struct net *net)
 {
        struct clusterip_net *cn = clusterip_pernet(net);
+
 #ifdef CONFIG_PROC_FS
+       mutex_lock(&cn->mutex);
        proc_remove(cn->procdir);
        cn->procdir = NULL;
+       mutex_unlock(&cn->mutex);
 #endif
        nf_unregister_net_hook(net, &cip_arp_ops);
 }