igb: Add lock to avoid data race
authorLin Ma <linma@zju.edu.cn>
Wed, 17 Aug 2022 18:49:21 +0000 (11:49 -0700)
committerJakub Kicinski <kuba@kernel.org>
Thu, 18 Aug 2022 18:03:26 +0000 (11:03 -0700)
The commit c23d92b80e0b ("igb: Teardown SR-IOV before
unregister_netdev()") places the unregister_netdev() call after the
igb_disable_sriov() call to avoid functionality issue.

However, it introduces several race conditions when detaching a device.
For example, when .remove() is called, the below interleaving leads to
use-after-free.

 (FREE from device detaching)      |   (USE from netdev core)
igb_remove                         |  igb_ndo_get_vf_config
 igb_disable_sriov                 |  vf >= adapter->vfs_allocated_count?
  kfree(adapter->vf_data)          |
  adapter->vfs_allocated_count = 0 |
                                   |    memcpy(... adapter->vf_data[vf]

Moreover, the igb_disable_sriov() also suffers from data race with the
requests from VF driver.

 (FREE from device detaching)      |   (USE from requests)
igb_remove                         |  igb_msix_other
 igb_disable_sriov                 |   igb_msg_task
  kfree(adapter->vf_data)          |    vf < adapter->vfs_allocated_count
  adapter->vfs_allocated_count = 0 |

To this end, this commit first eliminates the data races from netdev
core by using rtnl_lock (similar to commit 719479230893 ("dpaa2-eth: add
MAC/PHY support through phylink")). And then adds a spinlock to
eliminate races from driver requests. (similar to commit 1e53834ce541
("ixgbe: Add locking to prevent panic when setting sriov_numvfs to zero")

Fixes: c23d92b80e0b ("igb: Teardown SR-IOV before unregister_netdev()")
Signed-off-by: Lin Ma <linma@zju.edu.cn>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Link: https://lore.kernel.org/r/20220817184921.735244-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/intel/igb/igb.h
drivers/net/ethernet/intel/igb/igb_main.c

index 2d3daf0..015b781 100644 (file)
@@ -664,6 +664,8 @@ struct igb_adapter {
        struct igb_mac_addr *mac_table;
        struct vf_mac_filter vf_macs;
        struct vf_mac_filter *vf_mac_list;
+       /* lock for VF resources */
+       spinlock_t vfs_lock;
 };
 
 /* flags controlling PTP/1588 function */
index d8b836a..2796e81 100644 (file)
@@ -3637,6 +3637,7 @@ static int igb_disable_sriov(struct pci_dev *pdev)
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct igb_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
+       unsigned long flags;
 
        /* reclaim resources allocated to VFs */
        if (adapter->vf_data) {
@@ -3649,12 +3650,13 @@ static int igb_disable_sriov(struct pci_dev *pdev)
                        pci_disable_sriov(pdev);
                        msleep(500);
                }
-
+               spin_lock_irqsave(&adapter->vfs_lock, flags);
                kfree(adapter->vf_mac_list);
                adapter->vf_mac_list = NULL;
                kfree(adapter->vf_data);
                adapter->vf_data = NULL;
                adapter->vfs_allocated_count = 0;
+               spin_unlock_irqrestore(&adapter->vfs_lock, flags);
                wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
                wrfl();
                msleep(100);
@@ -3814,7 +3816,9 @@ static void igb_remove(struct pci_dev *pdev)
        igb_release_hw_control(adapter);
 
 #ifdef CONFIG_PCI_IOV
+       rtnl_lock();
        igb_disable_sriov(pdev);
+       rtnl_unlock();
 #endif
 
        unregister_netdev(netdev);
@@ -3974,6 +3978,9 @@ static int igb_sw_init(struct igb_adapter *adapter)
 
        spin_lock_init(&adapter->nfc_lock);
        spin_lock_init(&adapter->stats64_lock);
+
+       /* init spinlock to avoid concurrency of VF resources */
+       spin_lock_init(&adapter->vfs_lock);
 #ifdef CONFIG_PCI_IOV
        switch (hw->mac.type) {
        case e1000_82576:
@@ -7958,8 +7965,10 @@ unlock:
 static void igb_msg_task(struct igb_adapter *adapter)
 {
        struct e1000_hw *hw = &adapter->hw;
+       unsigned long flags;
        u32 vf;
 
+       spin_lock_irqsave(&adapter->vfs_lock, flags);
        for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
                /* process any reset requests */
                if (!igb_check_for_rst(hw, vf))
@@ -7973,6 +7982,7 @@ static void igb_msg_task(struct igb_adapter *adapter)
                if (!igb_check_for_ack(hw, vf))
                        igb_rcv_ack_from_vf(adapter, vf);
        }
+       spin_unlock_irqrestore(&adapter->vfs_lock, flags);
 }
 
 /**