{net/RDMA}/mlx5: introduce lag_for_each_peer
authorShay Drory <shayd@nvidia.com>
Tue, 6 Jun 2023 07:12:06 +0000 (00:12 -0700)
committerSaeed Mahameed <saeedm@nvidia.com>
Wed, 7 Jun 2023 21:00:42 +0000 (14:00 -0700)
Introduce a generic APIs to iterate over all the devices which are part
of the LAG. This API replace mlx5_lag_get_peer_mdev() which retrieve
only a single peer device from the lag.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/infiniband/hw/mlx5/ib_rep.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
include/linux/mlx5/driver.h

index a4db22f..c7a4ee8 100644 (file)
@@ -30,45 +30,65 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
 
 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
 
+static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
+{
+       struct mlx5_core_dev *peer_dev;
+       int i;
+
+       mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+               u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+               if (mlx5_lag_is_mpesw(peer_dev))
+                       *num_ports += peer_num_ports;
+               else
+                       /* Only 1 ib port is the representor for all uplinks */
+                       *num_ports += peer_num_ports - 1;
+       }
+}
+
 static int
 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
        u32 num_ports = mlx5_eswitch_get_total_vports(dev);
+       struct mlx5_core_dev *lag_master = dev;
        const struct mlx5_ib_profile *profile;
        struct mlx5_core_dev *peer_dev;
        struct mlx5_ib_dev *ibdev;
-       int second_uplink = false;
-       u32 peer_num_ports;
+       int new_uplink = false;
        int vport_index;
        int ret;
+       int i;
 
        vport_index = rep->vport_index;
 
        if (mlx5_lag_is_shared_fdb(dev)) {
-               peer_dev = mlx5_lag_get_peer_mdev(dev);
-               peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
                if (mlx5_lag_is_master(dev)) {
-                       if (mlx5_lag_is_mpesw(dev))
-                               num_ports += peer_num_ports;
-                       else
-                               num_ports += peer_num_ports - 1;
-
+                       mlx5_ib_num_ports_update(dev, &num_ports);
                } else {
                        if (rep->vport == MLX5_VPORT_UPLINK) {
                                if (!mlx5_lag_is_mpesw(dev))
                                        return 0;
-                               second_uplink = true;
+                               new_uplink = true;
                        }
+                       mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+                               u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+                               if (mlx5_lag_is_master(peer_dev))
+                                       lag_master = peer_dev;
+                               else if (!mlx5_lag_is_mpesw(dev))
+                               /* Only 1 ib port is the representor for all uplinks */
+                                       peer_n_ports--;
 
-                       vport_index += peer_num_ports;
-                       dev = peer_dev;
+                               if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
+                                       vport_index += peer_n_ports;
+                       }
                }
        }
 
-       if (rep->vport == MLX5_VPORT_UPLINK && !second_uplink)
+       if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
                profile = &raw_eth_profile;
        else
-               return mlx5_ib_set_vport_rep(dev, rep, vport_index);
+               return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
 
        ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
        if (!ibdev)
@@ -85,8 +105,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
        vport_index = rep->vport_index;
        ibdev->port[vport_index].rep = rep;
        ibdev->port[vport_index].roce.netdev =
-               mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
-       ibdev->mdev = dev;
+               mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport);
+       ibdev->mdev = lag_master;
        ibdev->num_ports = num_ports;
 
        ret = __mlx5_ib_add(ibdev, profile);
@@ -94,8 +114,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
                goto fail_add;
 
        rep->rep_data[REP_IB].priv = ibdev;
-       if (mlx5_lag_is_shared_fdb(dev))
-               mlx5_ib_register_peer_vport_reps(dev);
+       if (mlx5_lag_is_shared_fdb(lag_master))
+               mlx5_ib_register_peer_vport_reps(lag_master);
 
        return 0;
 
@@ -118,23 +138,27 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
        struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
        int vport_index = rep->vport_index;
        struct mlx5_ib_port *port;
+       int i;
 
        if (WARN_ON(!mdev))
                return;
 
+       if (!dev)
+               return;
+
        if (mlx5_lag_is_shared_fdb(mdev) &&
            !mlx5_lag_is_master(mdev)) {
-               struct mlx5_core_dev *peer_mdev;
-
                if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
                        return;
-               peer_mdev = mlx5_lag_get_peer_mdev(mdev);
-               vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
+               for (i = 0; i < dev->num_ports; i++) {
+                       if (dev->port[i].rep == rep)
+                               break;
+               }
+               if (WARN_ON(i == dev->num_ports))
+                       return;
+               vport_index = i;
        }
 
-       if (!dev)
-               return;
-
        port = &dev->port[vport_index];
        write_lock(&port->roce.netdev_lock);
        port->roce.netdev = NULL;
@@ -143,16 +167,18 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
        port->rep = NULL;
 
        if (rep->vport == MLX5_VPORT_UPLINK) {
-               struct mlx5_core_dev *peer_mdev;
-               struct mlx5_eswitch *esw;
 
                if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
                        return;
 
                if (mlx5_lag_is_shared_fdb(mdev)) {
-                       peer_mdev = mlx5_lag_get_peer_mdev(mdev);
-                       esw = peer_mdev->priv.eswitch;
-                       mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+                       struct mlx5_core_dev *peer_mdev;
+                       struct mlx5_eswitch *esw;
+
+                       mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+                               esw = peer_mdev->priv.eswitch;
+                               mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+                       }
                }
                __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
        }
@@ -166,14 +192,14 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
 
 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
 {
-       struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+       struct mlx5_core_dev *peer_mdev;
        struct mlx5_eswitch *esw;
+       int i;
 
-       if (!peer_mdev)
-               return;
-
-       esw = peer_mdev->priv.eswitch;
-       mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+       mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+               esw = peer_mdev->priv.eswitch;
+               mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+       }
 }
 
 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
index 11374c3..8a10ed4 100644 (file)
@@ -244,16 +244,22 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
            ft->type == FS_FT_FDB &&
            mlx5_lag_is_shared_fdb(dev) &&
            mlx5_lag_is_master(dev)) {
-               err = mlx5_cmd_set_slave_root_fdb(dev,
-                                                 mlx5_lag_get_peer_mdev(dev),
-                                                 !disconnect, (!disconnect) ?
-                                                 ft->id : 0);
-               if (err && !disconnect) {
-                       MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
-                       MLX5_SET(set_flow_table_root_in, in, table_id,
-                                ns->root_ft->id);
-                       mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+               struct mlx5_core_dev *peer_dev;
+               int i;
+
+               mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+                       err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect,
+                                                         (!disconnect) ? ft->id : 0);
+                       if (err && !disconnect) {
+                               MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+                               MLX5_SET(set_flow_table_root_in, in, table_id,
+                                        ns->root_ft->id);
+                               mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+                       }
+                       if (err)
+                               break;
                }
+
        }
 
        return err;
index c820f7d..c55e36e 100644 (file)
@@ -1519,26 +1519,37 @@ u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
 
-struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
 {
        struct mlx5_core_dev *peer_dev = NULL;
        struct mlx5_lag *ldev;
        unsigned long flags;
+       int idx;
 
        spin_lock_irqsave(&lag_lock, flags);
        ldev = mlx5_lag_dev(dev);
        if (!ldev)
                goto unlock;
 
-       peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
-                          ldev->pf[MLX5_LAG_P2].dev :
-                          ldev->pf[MLX5_LAG_P1].dev;
+       if (*i == ldev->ports)
+               goto unlock;
+       for (idx = *i; idx < ldev->ports; idx++)
+               if (ldev->pf[idx].dev != dev)
+                       break;
+
+       if (idx == ldev->ports) {
+               *i = idx;
+               goto unlock;
+       }
+       *i = idx + 1;
+
+       peer_dev = ldev->pf[idx].dev;
 
 unlock:
        spin_unlock_irqrestore(&lag_lock, flags);
        return peer_dev;
 }
-EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
 
 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
                                 u64 *values,
index 94d2be5..9a744c4 100644 (file)
@@ -1174,7 +1174,13 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
                                 u64 *values,
                                 int num_counters,
                                 size_t *offsets);
-struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i);
+
+#define mlx5_lag_for_each_peer_mdev(dev, peer, i)                              \
+       for (i = 0, peer = mlx5_lag_get_next_peer_mdev(dev, &i);                \
+            peer;                                                              \
+            peer = mlx5_lag_get_next_peer_mdev(dev, &i))
+
 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev);
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);