From 7c34ec19e10c0d13ca2f3435fb85d2dddccad917 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Thu, 23 Aug 2018 13:47:53 +0300 Subject: [PATCH] net/mlx5: Make RoCE and SR-IOV LAG modes explicit With the introduction of SR-IOV LAG, checking whether LAG is active is no longer good enough, since RoCE and SR-IOV LAG each entails different behavior by both the core and infiniband drivers. This patch introduces facilities to discern LAG type, in addition to mlx5_lag_is_active(). These are implemented in such a way as to allow more complex mode combinations in the future. Signed-off-by: Aviv Heller Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 13 ++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 79 +++++++++++++++++----- include/linux/mlx5/driver.h | 2 + 8 files changed, 79 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d66457e..e85974a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -445,7 +445,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, if (!ndev) goto out; - if (mlx5_lag_is_active(dev->mdev)) { + if (dev->lag_active) { rcu_read_lock(); upper = netdev_master_upper_dev_get_rcu(ndev); if (upper) { @@ -1848,7 +1848,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->lib_caps = req.lib_caps; print_lib_caps(dev, context->lib_caps); - if (mlx5_lag_is_active(dev->mdev)) { + if (dev->lag_active) { u8 port = mlx5_core_native_port_num(dev->mdev); atomic_set(&context->tx_port_affinity, @@ -4841,7 +4841,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) struct mlx5_flow_table *ft; int err; - if (!ns || !mlx5_lag_is_active(mdev)) + if (!ns || !mlx5_lag_is_roce(mdev)) return 0; err = mlx5_cmd_create_vport_lag(mdev); @@ -4855,6 +4855,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) } dev->flow_db->lag_demux_ft = ft; + dev->lag_active = true; return 0; err_destroy_vport_lag: @@ -4866,7 +4867,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; - if (dev->flow_db->lag_demux_ft) { + if (dev->lag_active) { + dev->lag_active = false; + mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft); dev->flow_db->lag_demux_ft = NULL; @@ -6173,7 +6176,7 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) const char *name; rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group); - if (!mlx5_lag_is_active(dev->mdev)) + if (!mlx5_lag_is_roce(dev->mdev)) name = "mlx5_%d"; else name = "mlx5_bond_%d"; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index c89b3b4..e507b6e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -936,6 +936,7 @@ struct mlx5_ib_dev { struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; struct mlx5_eswitch_rep *rep; + int lag_active; struct mlx5_ib_lb_state lb; u8 umr_fence; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 3747cc6..a0e9ff7 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3258,7 +3258,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, (ibqp->qp_type == IB_QPT_RAW_PACKET) || (ibqp->qp_type == IB_QPT_XRC_INI) || (ibqp->qp_type == IB_QPT_XRC_TGT)) { - if (mlx5_lag_is_active(dev->mdev)) { + if (dev->lag_active) { u8 p = mlx5_core_native_port_num(dev->mdev); tx_affinity = get_tx_affinity(dev, pd, base, p); context->flags |= cpu_to_be32(tx_affinity << 24); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index d5d161a..b92f8b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -35,7 +35,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, dst_is_lag_dev = (uplink_upper && netif_is_lag_master(uplink_upper) && rt->dst.dev == uplink_upper && - mlx5_lag_is_active(priv->mdev)); + mlx5_lag_is_sriov(priv->mdev)); /* if the egress device isn't on the same HW e-switch or * it's a LAG device, use the uplink @@ -94,7 +94,7 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, dst_is_lag_dev = (uplink_upper && netif_is_lag_master(uplink_upper) && dst->dev == uplink_upper && - mlx5_lag_is_active(priv->mdev)); + mlx5_lag_is_sriov(priv->mdev)); /* if the egress device isn't on the same HW e-switch or * it's a LAG device, use the uplink diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 17f2412..e4a34c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -314,7 +314,7 @@ int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: attr->u.ppid.id_len = ETH_ALEN; - if (uplink_upper && mlx5_lag_is_active(uplink_priv->mdev)) { + if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr); } else { struct mlx5e_rep_priv *rpriv = priv->ppriv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 864f3b0..53ebb5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2718,7 +2718,7 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); - return esw_paired && mlx5_lag_is_active(attr->in_mdev) && + return esw_paired && mlx5_lag_is_sriov(attr->in_mdev) && (is_rep_ingress || act_is_encap); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index db5ef70..feb8230 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -37,9 +37,12 @@ #include "eswitch.h" enum { - MLX5_LAG_FLAG_BONDED = 1 << 0, + MLX5_LAG_FLAG_ROCE = 1 << 0, + MLX5_LAG_FLAG_SRIOV = 1 << 1, }; +#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV) + struct lag_func { struct mlx5_core_dev *dev; struct net_device *netdev; @@ -161,9 +164,19 @@ static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, return -1; } +static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_ROCE); +} + +static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); +} + static bool __mlx5_lag_is_active(struct mlx5_lag *ldev) { - return !!(ldev->flags & MLX5_LAG_FLAG_BONDED); + return !!(ldev->flags & MLX5_LAG_MODE_FLAGS); } static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, @@ -229,9 +242,10 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, } static void mlx5_activate_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker) + struct lag_tracker *tracker, + u8 flags) { - ldev->flags |= MLX5_LAG_FLAG_BONDED; + ldev->flags |= flags; mlx5_create_lag(ldev, tracker); } @@ -240,7 +254,7 @@ static void mlx5_deactivate_lag(struct mlx5_lag *ldev) struct mlx5_core_dev *dev0 = ldev->pf[0].dev; int err; - ldev->flags &= ~MLX5_LAG_FLAG_BONDED; + ldev->flags &= ~MLX5_LAG_MODE_FLAGS; err = mlx5_cmd_destroy_lag(dev0); if (err) @@ -263,15 +277,13 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[0].dev; struct mlx5_core_dev *dev1 = ldev->pf[1].dev; - bool do_bond, sriov_enabled; struct lag_tracker tracker; + bool do_bond, roce_lag; int i; if (!dev0 || !dev1) return; - sriov_enabled = mlx5_sriov_is_enabled(dev0) || mlx5_sriov_is_enabled(dev1); - mutex_lock(&lag_mutex); tracker = ldev->tracker; mutex_unlock(&lag_mutex); @@ -279,28 +291,35 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); if (do_bond && !__mlx5_lag_is_active(ldev)) { - if (!sriov_enabled) + roce_lag = !mlx5_sriov_is_enabled(dev0) && + !mlx5_sriov_is_enabled(dev1); + + if (roce_lag) for (i = 0; i < MLX5_MAX_PORTS; i++) mlx5_remove_dev_by_protocol(ldev->pf[i].dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_activate_lag(ldev, &tracker); + mlx5_activate_lag(ldev, &tracker, + roce_lag ? MLX5_LAG_FLAG_ROCE : + MLX5_LAG_FLAG_SRIOV); - if (!sriov_enabled) { + if (roce_lag) { mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); mlx5_nic_vport_enable_roce(dev1); } } else if (do_bond && __mlx5_lag_is_active(ldev)) { mlx5_modify_lag(ldev, &tracker); } else if (!do_bond && __mlx5_lag_is_active(ldev)) { - if (!sriov_enabled) { + roce_lag = __mlx5_lag_is_roce(ldev); + + if (roce_lag) { mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); mlx5_nic_vport_disable_roce(dev1); } mlx5_deactivate_lag(ldev); - if (!sriov_enabled) + if (roce_lag) for (i = 0; i < MLX5_MAX_PORTS; i++) if (ldev->pf[i].dev) mlx5_add_dev_by_protocol(ldev->pf[i].dev, @@ -572,6 +591,20 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) } } +bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + res = ldev && __mlx5_lag_is_roce(ldev); + mutex_unlock(&lag_mutex); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_roce); + bool mlx5_lag_is_active(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; @@ -586,6 +619,20 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_is_active); +bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + res = ldev && __mlx5_lag_is_sriov(ldev); + mutex_unlock(&lag_mutex); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_sriov); + void mlx5_lag_update(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; @@ -609,7 +656,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (!(ldev && __mlx5_lag_is_active(ldev))) + if (!(ldev && __mlx5_lag_is_roce(ldev))) goto unlock; if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { @@ -638,7 +685,7 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) return true; ldev = mlx5_lag_dev_get(dev); - if (!ldev || !__mlx5_lag_is_active(ldev) || ldev->pf[0].dev == dev) + if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev) return true; /* If bonded, we do not add an IB device for PF1. */ @@ -665,7 +712,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (ldev && __mlx5_lag_is_active(ldev)) { + if (ldev && __mlx5_lag_is_roce(ldev)) { num_ports = MLX5_MAX_PORTS; mdev[0] = ldev->pf[0].dev; mdev[1] = ldev->pf[1].dev; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cd7af5d..4d16ba0 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1019,6 +1019,8 @@ int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); +bool mlx5_lag_is_roce(struct mlx5_core_dev *dev); +bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, -- 2.7.4