net: Write lock dev_base_lock without disabling bottom halves.
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>
Fri, 26 Nov 2021 16:15:29 +0000 (17:15 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 29 Jun 2022 07:03:22 +0000 (09:03 +0200)
[ Upstream commit fd888e85fe6b661e78044dddfec0be5271afa626 ]

The writer acquires dev_base_lock with disabled bottom halves.
The reader can acquire dev_base_lock without disabling bottom halves
because there is no writer in softirq context.

On PREEMPT_RT the softirqs are preemptible and local_bh_disable() acts
as a lock to ensure that resources, that are protected by disabling
bottom halves, remain protected.
This leads to a circular locking dependency if the lock acquired with
disabled bottom halves (as in write_lock_bh()) and somewhere else with
enabled bottom halves (as by read_lock() in netstat_show()) followed by
disabling bottom halves (cxgb_get_stats() -> t4_wr_mbox_meat_timeout()
-> spin_lock_bh()). This is the reverse locking order.

All read_lock() invocation are from sysfs callback which are not invoked
from softirq context. Therefore there is no need to disable bottom
halves while acquiring a write lock.

Acquire the write lock of dev_base_lock without disabling bottom halves.

Reported-by: Pei Zhang <pezhang@redhat.com>
Reported-by: Luis Claudio R. Goncalves <lgoncalv@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
net/core/dev.c
net/core/link_watch.c
net/core/rtnetlink.c
net/hsr/hsr_device.c

index b9731b2..860fc6a 100644 (file)
@@ -365,12 +365,12 @@ static void list_netdevice(struct net_device *dev)
 
        ASSERT_RTNL();
 
-       write_lock_bh(&dev_base_lock);
+       write_lock(&dev_base_lock);
        list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
        netdev_name_node_add(net, dev->name_node);
        hlist_add_head_rcu(&dev->index_hlist,
                           dev_index_hash(net, dev->ifindex));
-       write_unlock_bh(&dev_base_lock);
+       write_unlock(&dev_base_lock);
 
        dev_base_seq_inc(net);
 }
@@ -383,11 +383,11 @@ static void unlist_netdevice(struct net_device *dev)
        ASSERT_RTNL();
 
        /* Unlink dev from the device chain */
-       write_lock_bh(&dev_base_lock);
+       write_lock(&dev_base_lock);
        list_del_rcu(&dev->dev_list);
        netdev_name_node_del(dev->name_node);
        hlist_del_rcu(&dev->index_hlist);
-       write_unlock_bh(&dev_base_lock);
+       write_unlock(&dev_base_lock);
 
        dev_base_seq_inc(dev_net(dev));
 }
@@ -1266,15 +1266,15 @@ rollback:
 
        netdev_adjacent_rename_links(dev, oldname);
 
-       write_lock_bh(&dev_base_lock);
+       write_lock(&dev_base_lock);
        netdev_name_node_del(dev->name_node);
-       write_unlock_bh(&dev_base_lock);
+       write_unlock(&dev_base_lock);
 
        synchronize_rcu();
 
-       write_lock_bh(&dev_base_lock);
+       write_lock(&dev_base_lock);
        netdev_name_node_add(net, dev->name_node);
-       write_unlock_bh(&dev_base_lock);
+       write_unlock(&dev_base_lock);
 
        ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
        ret = notifier_to_errno(ret);
index 1a45584..9599afd 100644 (file)
@@ -55,7 +55,7 @@ static void rfc2863_policy(struct net_device *dev)
        if (operstate == dev->operstate)
                return;
 
-       write_lock_bh(&dev_base_lock);
+       write_lock(&dev_base_lock);
 
        switch(dev->link_mode) {
        case IF_LINK_MODE_TESTING:
@@ -74,7 +74,7 @@ static void rfc2863_policy(struct net_device *dev)
 
        dev->operstate = operstate;
 
-       write_unlock_bh(&dev_base_lock);
+       write_unlock(&dev_base_lock);
 }
 
 
index 9c0e8cc..8c85e93 100644 (file)
@@ -842,9 +842,9 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
        }
 
        if (dev->operstate != operstate) {
-               write_lock_bh(&dev_base_lock);
+               write_lock(&dev_base_lock);
                dev->operstate = operstate;
-               write_unlock_bh(&dev_base_lock);
+               write_unlock(&dev_base_lock);
                netdev_state_change(dev);
        }
 }
@@ -2781,11 +2781,11 @@ static int do_setlink(const struct sk_buff *skb,
        if (tb[IFLA_LINKMODE]) {
                unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]);
 
-               write_lock_bh(&dev_base_lock);
+               write_lock(&dev_base_lock);
                if (dev->link_mode ^ value)
                        status |= DO_SETLINK_NOTIFY;
                dev->link_mode = value;
-               write_unlock_bh(&dev_base_lock);
+               write_unlock(&dev_base_lock);
        }
 
        if (tb[IFLA_VFINFO_LIST]) {
index 26c3240..ea7b96e 100644 (file)
@@ -30,13 +30,13 @@ static bool is_slave_up(struct net_device *dev)
 
 static void __hsr_set_operstate(struct net_device *dev, int transition)
 {
-       write_lock_bh(&dev_base_lock);
+       write_lock(&dev_base_lock);
        if (dev->operstate != transition) {
                dev->operstate = transition;
-               write_unlock_bh(&dev_base_lock);
+               write_unlock(&dev_base_lock);
                netdev_state_change(dev);
        } else {
-               write_unlock_bh(&dev_base_lock);
+               write_unlock(&dev_base_lock);
        }
 }