net/mlx5: Bridge, fix peer entry ageing in LAG mode
authorVlad Buslov <vladbu@nvidia.com>
Wed, 9 Aug 2023 09:10:57 +0000 (11:10 +0200)
committerSaeed Mahameed <saeedm@nvidia.com>
Thu, 12 Oct 2023 18:10:33 +0000 (11:10 -0700)
With current implementation in single FDB LAG mode all packets are
processed by eswitch 0 rules. As such, 'peer' FDB entries receive the
packets for rules of other eswitches and are responsible for updating the
main entry by sending SWITCHDEV_FDB_ADD_TO_BRIDGE notification from their
background update wq task. However, this introduces a race condition when
non-zero eswitch instance decides to delete a FDB entry, sends
SWITCHDEV_FDB_DEL_TO_BRIDGE notification, but another eswitch's update task
refreshes the same entry concurrently while its async delete work is still
pending on the workque. In such case another SWITCHDEV_FDB_ADD_TO_BRIDGE
event may be generated and entry will remain stuck in FDB marked as
'offloaded' since no more SWITCHDEV_FDB_DEL_TO_BRIDGE notifications are
sent for deleting the peer entries.

Fix the issue by synchronously marking deleted entries with
MLX5_ESW_BRIDGE_FLAG_DELETED flag and skipping them in background update
job.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h

index 0fef853..5d128c5 100644 (file)
@@ -467,6 +467,17 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
                /* only handle the event on peers */
                if (mlx5_esw_bridge_is_local(dev, rep, esw))
                        break;
+
+               fdb_info = container_of(info,
+                                       struct switchdev_notifier_fdb_info,
+                                       info);
+               /* Mark for deletion to prevent the update wq task from
+                * spuriously refreshing the entry which would mark it again as
+                * offloaded in SW bridge. After this fallthrough to regular
+                * async delete code.
+                */
+               mlx5_esw_bridge_fdb_mark_deleted(dev, vport_num, esw_owner_vhca_id, br_offloads,
+                                                fdb_info);
                fallthrough;
        case SWITCHDEV_FDB_ADD_TO_DEVICE:
        case SWITCHDEV_FDB_DEL_TO_DEVICE:
index e36294b..1b9bc32 100644 (file)
@@ -1748,6 +1748,28 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16
        entry->lastuse = jiffies;
 }
 
+void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+                                     struct mlx5_esw_bridge_offloads *br_offloads,
+                                     struct switchdev_notifier_fdb_info *fdb_info)
+{
+       struct mlx5_esw_bridge_fdb_entry *entry;
+       struct mlx5_esw_bridge *bridge;
+
+       bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+       if (!bridge)
+               return;
+
+       entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid);
+       if (!entry) {
+               esw_debug(br_offloads->esw->dev,
+                         "FDB mark deleted entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n",
+                         fdb_info->addr, fdb_info->vid, vport_num);
+               return;
+       }
+
+       entry->flags |= MLX5_ESW_BRIDGE_FLAG_DELETED;
+}
+
 void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
                                struct mlx5_esw_bridge_offloads *br_offloads,
                                struct switchdev_notifier_fdb_info *fdb_info)
@@ -1810,7 +1832,8 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads)
                        unsigned long lastuse =
                                (unsigned long)mlx5_fc_query_lastuse(entry->ingress_counter);
 
-                       if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)
+                       if (entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER |
+                                           MLX5_ESW_BRIDGE_FLAG_DELETED))
                                continue;
 
                        if (time_after(lastuse, entry->lastuse))
index c2c7c70..d6f5391 100644 (file)
@@ -62,6 +62,9 @@ int mlx5_esw_bridge_vport_peer_unlink(struct net_device *br_netdev, u16 vport_nu
 void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
                                     struct mlx5_esw_bridge_offloads *br_offloads,
                                     struct switchdev_notifier_fdb_info *fdb_info);
+void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+                                     struct mlx5_esw_bridge_offloads *br_offloads,
+                                     struct switchdev_notifier_fdb_info *fdb_info);
 void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
                                struct mlx5_esw_bridge_offloads *br_offloads,
                                struct switchdev_notifier_fdb_info *fdb_info);
index 4911cc3..7c251af 100644 (file)
@@ -133,6 +133,7 @@ struct mlx5_esw_bridge_mdb_key {
 enum {
        MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0),
        MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1),
+       MLX5_ESW_BRIDGE_FLAG_DELETED = BIT(2),
 };
 
 enum {