net/mlx5: Serialize module cleanup with reload and remove
authorShay Drory <shayd@nvidia.com>
Wed, 14 Dec 2022 20:16:23 +0000 (22:16 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 14 Feb 2023 18:11:47 +0000 (19:11 +0100)
[ Upstream commit 8f0d1451ecf7b3bd5a06ffc866c753d0f3ab4683 ]

Currently, remove and reload flows can run in parallel to module cleanup.
This design is error prone. For example: aux_drivers callbacks are called
from both cleanup and remove flows with different lockings, which can
cause a deadlock[1].
Hence, serialize module cleanup with reload and remove.

[1]
       cleanup                        remove
       -------                        ------
   auxiliary_driver_unregister();
                                     devl_lock()
                                      auxiliary_device_delete(mlx5e_aux)
    device_lock(mlx5e_aux)
     devl_lock()
                                       device_lock(mlx5e_aux)

Fixes: 912cebf420c2 ("net/mlx5e: Connect ethernet part to auxiliary bus")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/main.c

index d4db1ad..f071755 100644 (file)
@@ -2094,7 +2094,7 @@ static int __init mlx5_init(void)
        mlx5_core_verify_params();
        mlx5_register_debugfs();
 
-       err = pci_register_driver(&mlx5_core_driver);
+       err = mlx5e_init();
        if (err)
                goto err_debug;
 
@@ -2102,16 +2102,16 @@ static int __init mlx5_init(void)
        if (err)
                goto err_sf;
 
-       err = mlx5e_init();
+       err = pci_register_driver(&mlx5_core_driver);
        if (err)
-               goto err_en;
+               goto err_pci;
 
        return 0;
 
-err_en:
+err_pci:
        mlx5_sf_driver_unregister();
 err_sf:
-       pci_unregister_driver(&mlx5_core_driver);
+       mlx5e_cleanup();
 err_debug:
        mlx5_unregister_debugfs();
        return err;
@@ -2119,9 +2119,9 @@ err_debug:
 
 static void __exit mlx5_cleanup(void)
 {
-       mlx5e_cleanup();
-       mlx5_sf_driver_unregister();
        pci_unregister_driver(&mlx5_core_driver);
+       mlx5_sf_driver_unregister();
+       mlx5e_cleanup();
        mlx5_unregister_debugfs();
 }