net/mlx5: Update SRIOV enable/disable to handle EC/VFs
authorDaniel Jurgens <danielj@nvidia.com>
Tue, 7 Mar 2023 16:52:29 +0000 (18:52 +0200)
committerSaeed Mahameed <saeedm@nvidia.com>
Sat, 10 Jun 2023 01:40:52 +0000 (18:40 -0700)
Previously on the embedded CPU platform SRIOV was never enabled/disabled
via mlx5_core_sriov_configure. Host VF updates are provided by an event
handler. Now in the disable flow it must be known if this is a disable
due to driver unload or SRIOV detach, or if the user updated the number
of VFs. If due to change in the number of VFs only wait for the pages of
ECVFs.

Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
Reviewed-by: William Tu <witu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/sriov.c

index d6ee016..fed8b48 100644 (file)
@@ -1809,7 +1809,7 @@ static void remove_one(struct pci_dev *pdev)
        mlx5_drain_fw_reset(dev);
        mlx5_drain_health_wq(dev);
        devlink_unregister(devlink);
-       mlx5_sriov_disable(pdev);
+       mlx5_sriov_disable(pdev, false);
        mlx5_thermal_uninit(dev);
        mlx5_crdump_disable(dev);
        mlx5_uninit_one(dev);
index 7ca0c7a..7a5f040 100644 (file)
@@ -195,7 +195,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
 int mlx5_sriov_attach(struct mlx5_core_dev *dev);
 void mlx5_sriov_detach(struct mlx5_core_dev *dev);
 int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
-void mlx5_sriov_disable(struct pci_dev *pdev);
+void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change);
 int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count);
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
index c2463a1..b73583b 100644 (file)
@@ -123,9 +123,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 }
 
 static void
-mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
+mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf, bool num_vf_change)
 {
        struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+       bool wait_for_ec_vf_pages = true;
+       bool wait_for_vf_pages = true;
        int err;
        int vf;
 
@@ -147,11 +149,30 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
 
        mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf);
 
+       /* There are a number of scenarios when SRIOV is being disabled:
+        *     1. VFs or ECVFs had been created, and now set back to 0 (num_vf_change == true).
+        *              - If EC SRIOV is enabled then this flow is happening on the
+        *                embedded platform, wait for only EC VF pages.
+        *              - If EC SRIOV is not enabled this flow is happening on non-embedded
+        *                platform, wait for the VF pages.
+        *
+        *     2. The driver is being unloaded. In this case wait for all pages.
+        */
+       if (num_vf_change) {
+               if (mlx5_core_ec_sriov_enabled(dev))
+                       wait_for_vf_pages = false;
+               else
+                       wait_for_ec_vf_pages = false;
+       }
+
+       if (wait_for_ec_vf_pages && mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_EC_VF]))
+               mlx5_core_warn(dev, "timeout reclaiming EC VFs pages\n");
+
        /* For ECPFs, skip waiting for host VF pages until ECPF is destroyed */
        if (mlx5_core_is_ecpf(dev))
                return;
 
-       if (mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF]))
+       if (wait_for_vf_pages && mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF]))
                mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
 }
 
@@ -172,12 +193,12 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
        err = pci_enable_sriov(pdev, num_vfs);
        if (err) {
                mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
-               mlx5_device_disable_sriov(dev, num_vfs, true);
+               mlx5_device_disable_sriov(dev, num_vfs, true, true);
        }
        return err;
 }
 
-void mlx5_sriov_disable(struct pci_dev *pdev)
+void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change)
 {
        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
        struct devlink *devlink = priv_to_devlink(dev);
@@ -185,7 +206,7 @@ void mlx5_sriov_disable(struct pci_dev *pdev)
 
        pci_disable_sriov(pdev);
        devl_lock(devlink);
-       mlx5_device_disable_sriov(dev, num_vfs, true);
+       mlx5_device_disable_sriov(dev, num_vfs, true, num_vf_change);
        devl_unlock(devlink);
 }
 
@@ -200,7 +221,7 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
        if (num_vfs)
                err = mlx5_sriov_enable(pdev, num_vfs);
        else
-               mlx5_sriov_disable(pdev);
+               mlx5_sriov_disable(pdev, true);
 
        if (!err)
                sriov->num_vfs = num_vfs;
@@ -245,7 +266,7 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev)
        if (!mlx5_core_is_pf(dev))
                return;
 
-       mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false);
+       mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false, false);
 }
 
 static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)