net/mlx5: Lock mlx5 devlink health recovery callback
authorMoshe Shemesh <moshe@nvidia.com>
Thu, 28 Jul 2022 15:53:49 +0000 (18:53 +0300)
committerJakub Kicinski <kuba@kernel.org>
Fri, 29 Jul 2022 04:58:47 +0000 (21:58 -0700)
Change devlink instance locks in mlx5 driver to have devlink health
recovery callback locked, while keeping all driver paths which lead to
devl_ API functions called by the driver locked.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/main.c

index 659021c..6e154b5 100644 (file)
@@ -622,8 +622,14 @@ mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
                               struct netlink_ext_ack *extack)
 {
        struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+       struct devlink *devlink = priv_to_devlink(dev);
+       int ret;
 
-       return mlx5_health_try_recover(dev);
+       devl_lock(devlink);
+       ret = mlx5_health_try_recover(dev);
+       devl_unlock(devlink);
+
+       return ret;
 }
 
 static int
@@ -666,16 +672,20 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
        struct mlx5_fw_reporter_ctx fw_reporter_ctx;
        struct mlx5_core_health *health;
        struct mlx5_core_dev *dev;
+       struct devlink *devlink;
        struct mlx5_priv *priv;
 
        health = container_of(work, struct mlx5_core_health, fatal_report_work);
        priv = container_of(health, struct mlx5_priv, health);
        dev = container_of(priv, struct mlx5_core_dev, priv);
+       devlink = priv_to_devlink(dev);
 
        enter_error_state(dev, false);
        if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
+               devl_lock(devlink);
                if (mlx5_health_try_recover(dev))
                        mlx5_core_err(dev, "health recovery failed\n");
+               devl_unlock(devlink);
                return;
        }
        fw_reporter_ctx.err_synd = health->synd;
index 01fcb23..1de9b39 100644 (file)
@@ -1932,7 +1932,7 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
 void mlx5_disable_device(struct mlx5_core_dev *dev)
 {
        mlx5_error_sw_reset(dev);
-       mlx5_unload_one(dev);
+       mlx5_unload_one_devl_locked(dev);
 }
 
 int mlx5_recover_device(struct mlx5_core_dev *dev)
@@ -1943,7 +1943,7 @@ int mlx5_recover_device(struct mlx5_core_dev *dev)
                        return -EIO;
        }
 
-       return mlx5_load_one(dev, true);
+       return mlx5_load_one_devl_locked(dev, true);
 }
 
 static struct pci_driver mlx5_core_driver = {