Merge tag 'mlx5-updates-2023-05-31' of git://git.kernel.org/pub/scm/linux/kernel...
authorJakub Kicinski <kuba@kernel.org>
Mon, 5 Jun 2023 22:42:22 +0000 (15:42 -0700)
committerJakub Kicinski <kuba@kernel.org>
Mon, 5 Jun 2023 22:42:22 +0000 (15:42 -0700)
Saeed Mahameed says:

====================
mlx5-updates-2023-05-31

net/mlx5: Support 4 ports VF LAG, part 1/2

This series continues the series[1] "Support 4 ports HCAs LAG mode"
by Mark Bloch. This series adds support for 4 ports VF LAG (single FDB
E-Switch).

This series of patches focuses on refactoring different sections of the
code that make assumptions about VF LAG supporting only two ports. For
instance, it assumes that each device can only have one peer.

Patches 1-5:
- Refactor ETH handling of TC rules of eswitches with peers.
Patch 6:
- Refactors peer miss group table.
Patches 7-9:
- Refactor single FDB E-Switch creation.
Patch 10:
- Refactor the DR layer.
Patches 11-14:
- Refactors devcom layer.

Next series will refactor LAG layer and enable 4 ports VF LAG.
This series specifically allows HCAs with 4 ports to create a VF LAG
with only 4 ports. It is not possible to create a VF LAG with 2 or 3
ports using HCAs that have 4 ports.

Currently, the Merged E-Switch feature only supports HCAs with 2 ports.
However, upcoming patches will introduce support for HCAs with 4 ports.

In order to activate VF LAG a user can execute:

devlink dev eswitch set pci/0000:08:00.0 mode switchdev
devlink dev eswitch set pci/0000:08:00.1 mode switchdev
devlink dev eswitch set pci/0000:08:00.2 mode switchdev
devlink dev eswitch set pci/0000:08:00.3 mode switchdev
ip link add name bond0 type bond
ip link set dev bond0 type bond mode 802.3ad
ip link set dev eth2 master bond0
ip link set dev eth3 master bond0
ip link set dev eth4 master bond0
ip link set dev eth5 master bond0

Where eth2, eth3, eth4 and eth5 are net-interfaces of pci/0000:08:00.0
pci/0000:08:00.1 pci/0000:08:00.2 pci/0000:08:00.3 respectively.

User can verify LAG state and type via debugfs:
/sys/kernel/debug/mlx5/0000\:08\:00.0/lag/state
/sys/kernel/debug/mlx5/0000\:08\:00.0/lag/type

[1]
https://lore.kernel.org/netdev/20220510055743.118828-1-saeedm@nvidia.com/

* tag 'mlx5-updates-2023-05-31' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux:
  net/mlx5: Devcom, extend mlx5_devcom_send_event to work with more than two devices
  net/mlx5: Devcom, introduce devcom_for_each_peer_entry
  net/mlx5: E-switch, mark devcom as not ready when all eswitches are unpaired
  net/mlx5: Devcom, Rename paired to ready
  net/mlx5: DR, handle more than one peer domain
  net/mlx5: E-switch, generalize shared FDB creation
  net/mlx5: E-switch, Handle multiple master egress rules
  net/mlx5: E-switch, refactor FDB miss rule add/remove
  net/mlx5: E-switch, enlarge peer miss group table
  net/mlx5e: Handle offloads flows per peer
  net/mlx5e: en_tc, re-factor query route port
  net/mlx5e: rep, store send to vport rules per peer
  net/mlx5e: tc, Refactor peer add/del flow
  net/mlx5e: en_tc, Extend peer flows to a list
====================

Link: https://lore.kernel.org/r/20230602191301.47004-1-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
24 files changed:
drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h

index ba2b1f2..6cc23af 100644 (file)
@@ -94,13 +94,13 @@ struct mlx5e_tc_flow {
         * destinations.
         */
        struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
-       struct mlx5e_tc_flow *peer_flow;
        struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
        struct list_head hairpin; /* flows sharing the same hairpin */
-       struct list_head peer;    /* flows with peer flow */
+       struct list_head peer[MLX5_MAX_PORTS];    /* flows with peer flow */
        struct list_head unready; /* flows not ready to be offloaded (e.g
                                   * due to missing route)
                                   */
+       struct list_head peer_flows; /* flows on peer */
        struct net_device *orig_dev; /* netdev adding flow first */
        int tmp_entry_index;
        struct list_head tmp_list; /* temporary flow list used by neigh update */
index 3e7041b..965a826 100644 (file)
@@ -374,7 +374,9 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
                                 struct mlx5_eswitch_rep *rep)
 {
        struct mlx5e_rep_sq *rep_sq, *tmp;
+       struct mlx5e_rep_sq_peer *sq_peer;
        struct mlx5e_rep_priv *rpriv;
+       unsigned long i;
 
        if (esw->mode != MLX5_ESWITCH_OFFLOADS)
                return;
@@ -382,31 +384,78 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
        rpriv = mlx5e_rep_to_rep_priv(rep);
        list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
                mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
-               if (rep_sq->send_to_vport_rule_peer)
-                       mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
+               xa_for_each(&rep_sq->sq_peer, i, sq_peer) {
+                       if (sq_peer->rule)
+                               mlx5_eswitch_del_send_to_vport_rule(sq_peer->rule);
+
+                       xa_erase(&rep_sq->sq_peer, i);
+                       kfree(sq_peer);
+               }
+
+               xa_destroy(&rep_sq->sq_peer);
                list_del(&rep_sq->list);
                kfree(rep_sq);
        }
 }
 
+static int mlx5e_sqs2vport_add_peers_rules(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep,
+                                          struct mlx5_devcom *devcom,
+                                          struct mlx5e_rep_sq *rep_sq, int i)
+{
+       struct mlx5_eswitch *peer_esw = NULL;
+       struct mlx5_flow_handle *flow_rule;
+       int tmp;
+
+       mlx5_devcom_for_each_peer_entry(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
+                                       peer_esw, tmp) {
+               int peer_rule_idx = mlx5_get_dev_index(peer_esw->dev);
+               struct mlx5e_rep_sq_peer *sq_peer;
+               int err;
+
+               sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL);
+               if (!sq_peer)
+                       return -ENOMEM;
+
+               flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
+                                                               rep, rep_sq->sqn);
+               if (IS_ERR(flow_rule)) {
+                       kfree(sq_peer);
+                       return PTR_ERR(flow_rule);
+               }
+
+               sq_peer->rule = flow_rule;
+               sq_peer->peer = peer_esw;
+               err = xa_insert(&rep_sq->sq_peer, peer_rule_idx, sq_peer, GFP_KERNEL);
+               if (err) {
+                       kfree(sq_peer);
+                       mlx5_eswitch_del_send_to_vport_rule(flow_rule);
+                       return err;
+               }
+       }
+
+       return 0;
+}
+
 static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                                 struct mlx5_eswitch_rep *rep,
                                 u32 *sqns_array, int sqns_num)
 {
-       struct mlx5_eswitch *peer_esw = NULL;
        struct mlx5_flow_handle *flow_rule;
        struct mlx5e_rep_priv *rpriv;
        struct mlx5e_rep_sq *rep_sq;
+       struct mlx5_devcom *devcom;
+       bool devcom_locked = false;
        int err;
        int i;
 
        if (esw->mode != MLX5_ESWITCH_OFFLOADS)
                return 0;
 
+       devcom = esw->dev->priv.devcom;
        rpriv = mlx5e_rep_to_rep_priv(rep);
-       if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS))
-               peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom,
-                                                    MLX5_DEVCOM_ESW_OFFLOADS);
+       if (mlx5_devcom_comp_is_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS) &&
+           mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+               devcom_locked = true;
 
        for (i = 0; i < sqns_num; i++) {
                rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
@@ -426,31 +475,30 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                rep_sq->send_to_vport_rule = flow_rule;
                rep_sq->sqn = sqns_array[i];
 
-               if (peer_esw) {
-                       flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
-                                                                       rep, sqns_array[i]);
-                       if (IS_ERR(flow_rule)) {
-                               err = PTR_ERR(flow_rule);
+               xa_init(&rep_sq->sq_peer);
+               if (devcom_locked) {
+                       err = mlx5e_sqs2vport_add_peers_rules(esw, rep, devcom, rep_sq, i);
+                       if (err) {
                                mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+                               xa_destroy(&rep_sq->sq_peer);
                                kfree(rep_sq);
                                goto out_err;
                        }
-                       rep_sq->send_to_vport_rule_peer = flow_rule;
                }
 
                list_add(&rep_sq->list, &rpriv->vport_sqs_list);
        }
 
-       if (peer_esw)
-               mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       if (devcom_locked)
+               mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 
        return 0;
 
 out_err:
        mlx5e_sqs2vport_stop(esw, rep);
 
-       if (peer_esw)
-               mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       if (devcom_locked)
+               mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 
        return err;
 }
@@ -1530,17 +1578,24 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
        return rpriv->netdev;
 }
 
-static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep)
+static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep,
+                                        struct mlx5_eswitch *peer_esw)
 {
+       int i = mlx5_get_dev_index(peer_esw->dev);
        struct mlx5e_rep_priv *rpriv;
        struct mlx5e_rep_sq *rep_sq;
 
+       WARN_ON_ONCE(!peer_esw);
        rpriv = mlx5e_rep_to_rep_priv(rep);
        list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
-               if (!rep_sq->send_to_vport_rule_peer)
+               struct mlx5e_rep_sq_peer *sq_peer = xa_load(&rep_sq->sq_peer, i);
+
+               if (!sq_peer || sq_peer->peer != peer_esw)
                        continue;
-               mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
-               rep_sq->send_to_vport_rule_peer = NULL;
+
+               mlx5_eswitch_del_send_to_vport_rule(sq_peer->rule);
+               xa_erase(&rep_sq->sq_peer, i);
+               kfree(sq_peer);
        }
 }
 
@@ -1548,24 +1603,52 @@ static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw,
                                      struct mlx5_eswitch_rep *rep,
                                      struct mlx5_eswitch *peer_esw)
 {
+       int i = mlx5_get_dev_index(peer_esw->dev);
        struct mlx5_flow_handle *flow_rule;
+       struct mlx5e_rep_sq_peer *sq_peer;
        struct mlx5e_rep_priv *rpriv;
        struct mlx5e_rep_sq *rep_sq;
+       int err;
 
        rpriv = mlx5e_rep_to_rep_priv(rep);
        list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
-               if (rep_sq->send_to_vport_rule_peer)
+               sq_peer = xa_load(&rep_sq->sq_peer, i);
+
+               if (sq_peer && sq_peer->peer)
                        continue;
-               flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn);
-               if (IS_ERR(flow_rule))
+
+               flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep,
+                                                               rep_sq->sqn);
+               if (IS_ERR(flow_rule)) {
+                       err = PTR_ERR(flow_rule);
                        goto err_out;
-               rep_sq->send_to_vport_rule_peer = flow_rule;
+               }
+
+               if (sq_peer) {
+                       sq_peer->rule = flow_rule;
+                       sq_peer->peer = peer_esw;
+                       continue;
+               }
+               sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL);
+               if (!sq_peer) {
+                       err = -ENOMEM;
+                       goto err_sq_alloc;
+               }
+               err = xa_insert(&rep_sq->sq_peer, i, sq_peer, GFP_KERNEL);
+               if (err)
+                       goto err_xa;
+               sq_peer->rule = flow_rule;
+               sq_peer->peer = peer_esw;
        }
 
        return 0;
+err_xa:
+       kfree(sq_peer);
+err_sq_alloc:
+       mlx5_eswitch_del_send_to_vport_rule(flow_rule);
 err_out:
-       mlx5e_vport_rep_event_unpair(rep);
-       return PTR_ERR(flow_rule);
+       mlx5e_vport_rep_event_unpair(rep, peer_esw);
+       return err;
 }
 
 static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
@@ -1578,7 +1661,7 @@ static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
        if (event == MLX5_SWITCHDEV_EVENT_PAIR)
                err = mlx5e_vport_rep_event_pair(esw, rep, data);
        else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR)
-               mlx5e_vport_rep_event_unpair(rep);
+               mlx5e_vport_rep_event_unpair(rep, data);
 
        return err;
 }
index 80b7f50..70640fa 100644 (file)
@@ -225,9 +225,14 @@ struct mlx5e_encap_entry {
        struct rcu_head rcu;
 };
 
+struct mlx5e_rep_sq_peer {
+       struct mlx5_flow_handle *rule;
+       void *peer;
+};
+
 struct mlx5e_rep_sq {
        struct mlx5_flow_handle *send_to_vport_rule;
-       struct mlx5_flow_handle *send_to_vport_rule_peer;
+       struct xarray sq_peer;
        u32 sqn;
        struct list_head         list;
 };
index fd9f928..88631fb 100644 (file)
@@ -1666,8 +1666,11 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
 {
        struct mlx5e_priv *out_priv, *route_priv;
        struct mlx5_core_dev *route_mdev;
+       struct mlx5_devcom *devcom;
        struct mlx5_eswitch *esw;
        u16 vhca_id;
+       int err;
+       int i;
 
        out_priv = netdev_priv(out_dev);
        esw = out_priv->mdev->priv.eswitch;
@@ -1675,28 +1678,25 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
        route_mdev = route_priv->mdev;
 
        vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
-       if (mlx5_lag_is_active(out_priv->mdev)) {
-               struct mlx5_devcom *devcom;
-               int err;
-
-               /* In lag case we may get devices from different eswitch instances.
-                * If we failed to get vport num, it means, mostly, that we on the wrong
-                * eswitch.
-                */
-               err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
-               if (err != -ENOENT)
-                       return err;
-
-               rcu_read_lock();
-               devcom = out_priv->mdev->priv.devcom;
-               esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-               err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV;
-               rcu_read_unlock();
+       err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+       if (!err)
+               return err;
 
+       if (!mlx5_lag_is_active(out_priv->mdev))
                return err;
+
+       rcu_read_lock();
+       devcom = out_priv->mdev->priv.devcom;
+       err = -ENODEV;
+       mlx5_devcom_for_each_peer_entry_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
+                                           esw, i) {
+               err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+               if (!err)
+                       break;
        }
+       rcu_read_unlock();
 
-       return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+       return err;
 }
 
 static int
@@ -1986,47 +1986,59 @@ void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
                mlx5e_flow_put(priv, flow);
 }
 
-static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow,
+                                      int peer_index)
 {
        struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
+       struct mlx5e_tc_flow *peer_flow;
+       struct mlx5e_tc_flow *tmp;
 
        if (!flow_flag_test(flow, ESWITCH) ||
            !flow_flag_test(flow, DUP))
                return;
 
        mutex_lock(&esw->offloads.peer_mutex);
-       list_del(&flow->peer);
+       list_del(&flow->peer[peer_index]);
        mutex_unlock(&esw->offloads.peer_mutex);
 
-       flow_flag_clear(flow, DUP);
-
-       if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
-               mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
-               kfree(flow->peer_flow);
+       list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) {
+               if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev))
+                       continue;
+               if (refcount_dec_and_test(&peer_flow->refcnt)) {
+                       mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow);
+                       list_del(&peer_flow->peer_flows);
+                       kfree(peer_flow);
+               }
        }
 
-       flow->peer_flow = NULL;
+       if (list_empty(&flow->peer_flows))
+               flow_flag_clear(flow, DUP);
 }
 
-static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+static void mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow *flow)
 {
-       struct mlx5_core_dev *dev = flow->priv->mdev;
-       struct mlx5_devcom *devcom = dev->priv.devcom;
-       struct mlx5_eswitch *peer_esw;
-
-       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-       if (!peer_esw)
-               return;
+       int i;
 
-       __mlx5e_tc_del_fdb_peer_flow(flow);
-       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       for (i = 0; i < MLX5_MAX_PORTS; i++) {
+               if (i == mlx5_get_dev_index(flow->priv->mdev))
+                       continue;
+               mlx5e_tc_del_fdb_peer_flow(flow, i);
+       }
 }
 
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
                              struct mlx5e_tc_flow *flow)
 {
        if (mlx5e_is_eswitch_flow(flow)) {
-               mlx5e_tc_del_fdb_peer_flow(flow);
+               struct mlx5_devcom *devcom = flow->priv->mdev->priv.devcom;
+
+               if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) {
+                       mlx5e_tc_del_fdb_flow(priv, flow);
+                       return;
+               }
+
+               mlx5e_tc_del_fdb_peers_flow(flow);
+               mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
                mlx5e_tc_del_fdb_flow(priv, flow);
        } else {
                mlx5e_tc_del_nic_flow(priv, flow);
@@ -4203,8 +4215,8 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
                flow_flag_test(flow, INGRESS);
        bool act_is_encap = !!(attr->action &
                               MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
-       bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
-                                               MLX5_DEVCOM_ESW_OFFLOADS);
+       bool esw_paired = mlx5_devcom_comp_is_ready(esw_attr->in_mdev->priv.devcom,
+                                                   MLX5_DEVCOM_ESW_OFFLOADS);
 
        if (!esw_paired)
                return false;
@@ -4295,6 +4307,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
        INIT_LIST_HEAD(&flow->hairpin);
        INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
        INIT_LIST_HEAD(&flow->attrs);
+       INIT_LIST_HEAD(&flow->peer_flows);
        refcount_set(&flow->refcnt, 1);
        init_completion(&flow->init_done);
        init_completion(&flow->del_hw_done);
@@ -4403,22 +4416,19 @@ out:
 
 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
                                      struct mlx5e_tc_flow *flow,
-                                     unsigned long flow_flags)
+                                     unsigned long flow_flags,
+                                     struct mlx5_eswitch *peer_esw)
 {
        struct mlx5e_priv *priv = flow->priv, *peer_priv;
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
-       struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
+       int i = mlx5_get_dev_index(peer_esw->dev);
        struct mlx5e_rep_priv *peer_urpriv;
        struct mlx5e_tc_flow *peer_flow;
        struct mlx5_core_dev *in_mdev;
        int err = 0;
 
-       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-       if (!peer_esw)
-               return -ENODEV;
-
        peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
        peer_priv = netdev_priv(peer_urpriv->netdev);
 
@@ -4443,14 +4453,13 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
                goto out;
        }
 
-       flow->peer_flow = peer_flow;
+       list_add_tail(&peer_flow->peer_flows, &flow->peer_flows);
        flow_flag_set(flow, DUP);
        mutex_lock(&esw->offloads.peer_mutex);
-       list_add_tail(&flow->peer, &esw->offloads.peer_flows);
+       list_add_tail(&flow->peer[i], &esw->offloads.peer_flows[i]);
        mutex_unlock(&esw->offloads.peer_mutex);
 
 out:
-       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
        return err;
 }
 
@@ -4461,30 +4470,48 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
                   struct net_device *filter_dev,
                   struct mlx5e_tc_flow **__flow)
 {
+       struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
        struct mlx5e_rep_priv *rpriv = priv->ppriv;
        struct mlx5_eswitch_rep *in_rep = rpriv->rep;
        struct mlx5_core_dev *in_mdev = priv->mdev;
+       struct mlx5_eswitch *peer_esw;
        struct mlx5e_tc_flow *flow;
        int err;
+       int i;
 
        flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
                                    in_mdev);
        if (IS_ERR(flow))
                return PTR_ERR(flow);
 
-       if (is_peer_flow_needed(flow)) {
-               err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
-               if (err) {
-                       mlx5e_tc_del_fdb_flow(priv, flow);
-                       goto out;
-               }
+       if (!is_peer_flow_needed(flow)) {
+               *__flow = flow;
+               return 0;
        }
 
-       *__flow = flow;
+       if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) {
+               err = -ENODEV;
+               goto clean_flow;
+       }
+
+       mlx5_devcom_for_each_peer_entry(devcom,
+                                       MLX5_DEVCOM_ESW_OFFLOADS,
+                                       peer_esw, i) {
+               err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw);
+               if (err)
+                       goto peer_clean;
+       }
+
+       mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 
+       *__flow = flow;
        return 0;
 
-out:
+peer_clean:
+       mlx5e_tc_del_fdb_peers_flow(flow);
+       mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+clean_flow:
+       mlx5e_tc_del_fdb_flow(priv, flow);
        return err;
 }
 
@@ -4702,7 +4729,6 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
 {
        struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
-       struct mlx5_eswitch *peer_esw;
        struct mlx5e_tc_flow *flow;
        struct mlx5_fc *counter;
        u64 lastuse = 0;
@@ -4737,23 +4763,29 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
        /* Under multipath it's possible for one rule to be currently
         * un-offloaded while the other rule is offloaded.
         */
-       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-       if (!peer_esw)
+       if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
                goto out;
 
-       if (flow_flag_test(flow, DUP) &&
-           flow_flag_test(flow->peer_flow, OFFLOADED)) {
-               u64 bytes2;
-               u64 packets2;
-               u64 lastuse2;
+       if (flow_flag_test(flow, DUP)) {
+               struct mlx5e_tc_flow *peer_flow;
 
-               if (flow_flag_test(flow, USE_ACT_STATS)) {
-                       f->use_act_stats = true;
-               } else {
-                       counter = mlx5e_tc_get_counter(flow->peer_flow);
+               list_for_each_entry(peer_flow, &flow->peer_flows, peer_flows) {
+                       u64 packets2;
+                       u64 lastuse2;
+                       u64 bytes2;
+
+                       if (!flow_flag_test(peer_flow, OFFLOADED))
+                               continue;
+                       if (flow_flag_test(flow, USE_ACT_STATS)) {
+                               f->use_act_stats = true;
+                               break;
+                       }
+
+                       counter = mlx5e_tc_get_counter(peer_flow);
                        if (!counter)
                                goto no_peer_counter;
-                       mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
+                       mlx5_fc_query_cached(counter, &bytes2, &packets2,
+                                            &lastuse2);
 
                        bytes += bytes2;
                        packets += packets2;
@@ -4762,7 +4794,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
        }
 
 no_peer_counter:
-       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 out:
        flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
                          FLOW_ACTION_HW_STATS_DELAYED);
@@ -5280,9 +5312,14 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
 {
        struct mlx5e_tc_flow *flow, *tmp;
+       int i;
 
-       list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
-               __mlx5e_tc_del_fdb_peer_flow(flow);
+       for (i = 0; i < MLX5_MAX_PORTS; i++) {
+               if (i == mlx5_get_dev_index(esw->dev))
+                       continue;
+               list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i])
+                       mlx5e_tc_del_fdb_peers_flow(flow);
+       }
 }
 
 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
index 2e504c7..24b1ca4 100644 (file)
@@ -15,13 +15,27 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport)
        vport->egress.offloads.fwd_rule = NULL;
 }
 
-static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport)
+void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport, int rule_index)
 {
-       if (!vport->egress.offloads.bounce_rule)
+       struct mlx5_flow_handle *bounce_rule =
+               xa_load(&vport->egress.offloads.bounce_rules, rule_index);
+
+       if (!bounce_rule)
                return;
 
-       mlx5_del_flow_rules(vport->egress.offloads.bounce_rule);
-       vport->egress.offloads.bounce_rule = NULL;
+       mlx5_del_flow_rules(bounce_rule);
+       xa_erase(&vport->egress.offloads.bounce_rules, rule_index);
+}
+
+static void esw_acl_egress_ofld_bounce_rules_destroy(struct mlx5_vport *vport)
+{
+       struct mlx5_flow_handle *bounce_rule;
+       unsigned long i;
+
+       xa_for_each(&vport->egress.offloads.bounce_rules, i, bounce_rule) {
+               mlx5_del_flow_rules(bounce_rule);
+               xa_erase(&vport->egress.offloads.bounce_rules, i);
+       }
 }
 
 static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw,
@@ -96,7 +110,7 @@ static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport)
 {
        esw_acl_egress_vlan_destroy(vport);
        esw_acl_egress_ofld_fwd2vport_destroy(vport);
-       esw_acl_egress_ofld_bounce_rule_destroy(vport);
+       esw_acl_egress_ofld_bounce_rules_destroy(vport);
 }
 
 static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
@@ -194,6 +208,7 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
                vport->egress.acl = NULL;
                return err;
        }
+       vport->egress.type = VPORT_EGRESS_ACL_TYPE_DEFAULT;
 
        err = esw_acl_egress_ofld_groups_create(esw, vport);
        if (err)
index c9f8469..536b04e 100644 (file)
@@ -10,6 +10,7 @@
 /* Eswitch acl egress external APIs */
 int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
 void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport);
+void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport, int rule_index);
 int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num,
                                   u16 passive_vport_num);
 int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num);
index 1ba03e2..bea7cc6 100644 (file)
@@ -647,22 +647,35 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
 }
 
 static struct mlx5_flow_handle *
-mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, const unsigned char *addr,
+mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, u16 esw_owner_vhca_id,
+                                        const unsigned char *addr,
                                         struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
                                         struct mlx5_esw_bridge *bridge)
 {
        struct mlx5_devcom *devcom = bridge->br_offloads->esw->dev->priv.devcom;
+       struct mlx5_eswitch *tmp, *peer_esw = NULL;
        static struct mlx5_flow_handle *handle;
-       struct mlx5_eswitch *peer_esw;
+       int i;
 
-       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-       if (!peer_esw)
+       if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
                return ERR_PTR(-ENODEV);
 
+       mlx5_devcom_for_each_peer_entry(devcom,
+                                       MLX5_DEVCOM_ESW_OFFLOADS,
+                                       tmp, i) {
+               if (mlx5_esw_is_owner(tmp, vport_num, esw_owner_vhca_id)) {
+                       peer_esw = tmp;
+                       break;
+               }
+       }
+       if (!peer_esw) {
+               mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+               return ERR_PTR(-ENODEV);
+       }
+
        handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id,
                                                              bridge, peer_esw);
-
-       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
        return handle;
 }
 
@@ -1369,8 +1382,9 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_ow
        entry->ingress_counter = counter;
 
        handle = peer ?
-               mlx5_esw_bridge_ingress_flow_peer_create(vport_num, addr, vlan,
-                                                        mlx5_fc_id(counter), bridge) :
+               mlx5_esw_bridge_ingress_flow_peer_create(vport_num, esw_owner_vhca_id,
+                                                        addr, vlan, mlx5_fc_id(counter),
+                                                        bridge) :
                mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan,
                                                    mlx5_fc_id(counter), bridge);
        if (IS_ERR(handle)) {
index 2eae594..2455f8b 100644 (file)
@@ -540,16 +540,29 @@ static struct mlx5_flow_handle *
 mlx5_esw_bridge_mcast_filter_flow_peer_create(struct mlx5_esw_bridge_port *port)
 {
        struct mlx5_devcom *devcom = port->bridge->br_offloads->esw->dev->priv.devcom;
+       struct mlx5_eswitch *tmp, *peer_esw = NULL;
        static struct mlx5_flow_handle *handle;
-       struct mlx5_eswitch *peer_esw;
+       int i;
 
-       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
-       if (!peer_esw)
+       if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
                return ERR_PTR(-ENODEV);
 
+       mlx5_devcom_for_each_peer_entry(devcom,
+                                       MLX5_DEVCOM_ESW_OFFLOADS,
+                                       tmp, i) {
+               if (mlx5_esw_is_owner(tmp, port->vport_num, port->esw_owner_vhca_id)) {
+                       peer_esw = tmp;
+                       break;
+               }
+       }
+       if (!peer_esw) {
+               mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+               return ERR_PTR(-ENODEV);
+       }
+
        handle = mlx5_esw_bridge_mcast_flow_with_esw_create(port, peer_esw);
 
-       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
        return handle;
 }
 
index f70124a..c42c16d 100644 (file)
@@ -123,8 +123,14 @@ struct vport_ingress {
        } offloads;
 };
 
+enum vport_egress_acl_type {
+       VPORT_EGRESS_ACL_TYPE_DEFAULT,
+       VPORT_EGRESS_ACL_TYPE_SHARED_FDB,
+};
+
 struct vport_egress {
        struct mlx5_flow_table *acl;
+       enum vport_egress_acl_type type;
        struct mlx5_flow_handle  *allowed_vlan;
        struct mlx5_flow_group *vlan_grp;
        union {
@@ -136,7 +142,7 @@ struct vport_egress {
                struct {
                        struct mlx5_flow_group *fwd_grp;
                        struct mlx5_flow_handle *fwd_rule;
-                       struct mlx5_flow_handle *bounce_rule;
+                       struct xarray bounce_rules;
                        struct mlx5_flow_group *bounce_grp;
                } offloads;
        };
@@ -218,7 +224,7 @@ struct mlx5_eswitch_fdb {
                        struct mlx5_flow_group *send_to_vport_grp;
                        struct mlx5_flow_group *send_to_vport_meta_grp;
                        struct mlx5_flow_group *peer_miss_grp;
-                       struct mlx5_flow_handle **peer_miss_rules;
+                       struct mlx5_flow_handle **peer_miss_rules[MLX5_MAX_PORTS];
                        struct mlx5_flow_group *miss_grp;
                        struct mlx5_flow_handle **send_to_vport_meta_rules;
                        struct mlx5_flow_handle *miss_rule_uni;
@@ -249,7 +255,7 @@ struct mlx5_esw_offload {
        struct mlx5_flow_group *vport_rx_drop_group;
        struct mlx5_flow_handle *vport_rx_drop_rule;
        struct xarray vport_reps;
-       struct list_head peer_flows;
+       struct list_head peer_flows[MLX5_MAX_PORTS];
        struct mutex peer_mutex;
        struct mutex encap_tbl_lock; /* protects encap_tbl */
        DECLARE_HASHTABLE(encap_tbl, 8);
@@ -337,6 +343,7 @@ struct mlx5_eswitch {
        int                     mode;
        u16                     manager_vport;
        u16                     first_host_vport;
+       u8                      num_peers;
        struct mlx5_esw_functions esw_funcs;
        struct {
                u32             large_group_num;
@@ -578,6 +585,13 @@ mlx5_esw_is_manager_vport(const struct mlx5_eswitch *esw, u16 vport_num)
        return esw->manager_vport == vport_num;
 }
 
+static inline bool mlx5_esw_is_owner(struct mlx5_eswitch *esw, u16 vport_num,
+                                    u16 esw_owner_vhca_id)
+{
+       return esw_owner_vhca_id == MLX5_CAP_GEN(esw->dev, vhca_id) ||
+               (vport_num == MLX5_VPORT_UPLINK && mlx5_lag_is_master(esw->dev));
+}
+
 static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
 {
        return mlx5_core_is_ecpf_esw_manager(dev) ?
@@ -748,9 +762,9 @@ void esw_vport_change_handle_locked(struct mlx5_vport *vport);
 
 bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
 
-int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
-                                           struct mlx5_eswitch *slave_esw);
-void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw,
+                                            struct mlx5_eswitch *slave_esw, int max_slaves);
+void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
                                              struct mlx5_eswitch *slave_esw);
 int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
 
@@ -802,14 +816,14 @@ mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
 }
 
 static inline int
-mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
-                                       struct mlx5_eswitch *slave_esw)
+mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw,
+                                        struct mlx5_eswitch *slave_esw, int max_slaves)
 {
        return 0;
 }
 
 static inline void
-mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
                                         struct mlx5_eswitch *slave_esw) {}
 
 static inline int
index 1b2f5e2..29de4e7 100644 (file)
@@ -1132,7 +1132,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
                flows[vport->index] = flow;
        }
 
-       esw->fdb_table.offloads.peer_miss_rules = flows;
+       esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)] = flows;
 
        kvfree(spec);
        return 0;
@@ -1160,13 +1160,14 @@ alloc_flows_err:
        return err;
 }
 
-static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
+static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
+                                       struct mlx5_core_dev *peer_dev)
 {
        struct mlx5_flow_handle **flows;
        struct mlx5_vport *vport;
        unsigned long i;
 
-       flows = esw->fdb_table.offloads.peer_miss_rules;
+       flows = esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)];
 
        mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev))
                mlx5_del_flow_rules(flows[vport->index]);
@@ -1573,6 +1574,7 @@ esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw,
                               u32 *flow_group_in,
                               int *ix)
 {
+       int max_peer_ports = (esw->total_vports - 1) * (MLX5_MAX_PORTS - 1);
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
        struct mlx5_flow_group *g;
        void *match_criteria;
@@ -1599,8 +1601,8 @@ esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw,
 
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix);
        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
-                *ix + esw->total_vports - 1);
-       *ix += esw->total_vports;
+                *ix + max_peer_ports);
+       *ix += max_peer_ports + 1;
 
        g = mlx5_create_flow_group(fdb, flow_group_in);
        if (IS_ERR(g)) {
@@ -1702,7 +1704,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
         * total vports of the peer (currently is also uses esw->total_vports).
         */
        table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
-                    esw->total_vports * 2 + MLX5_ESW_MISS_FLOWS;
+                    esw->total_vports * MLX5_MAX_PORTS + MLX5_ESW_MISS_FLOWS;
 
        /* create the slow path fdb with encap set, so further table instances
         * can be created at run time while VFs are probed if the FW allows that.
@@ -2510,6 +2512,7 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
                                        struct mlx5_vport *vport,
                                        struct mlx5_flow_table *acl)
 {
+       u16 slave_index = MLX5_CAP_GEN(slave, vhca_id);
        struct mlx5_flow_handle *flow_rule = NULL;
        struct mlx5_flow_destination dest = {};
        struct mlx5_flow_act flow_act = {};
@@ -2525,8 +2528,7 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
                            misc_parameters);
        MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
-       MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
-                MLX5_CAP_GEN(slave, vhca_id));
+       MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, slave_index);
 
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
@@ -2541,44 +2543,35 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
 
        flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act,
                                        &dest, 1);
-       if (IS_ERR(flow_rule))
+       if (IS_ERR(flow_rule)) {
                err = PTR_ERR(flow_rule);
-       else
-               vport->egress.offloads.bounce_rule = flow_rule;
+       } else {
+               err = xa_insert(&vport->egress.offloads.bounce_rules,
+                               slave_index, flow_rule, GFP_KERNEL);
+               if (err)
+                       mlx5_del_flow_rules(flow_rule);
+       }
 
        kvfree(spec);
        return err;
 }
 
-static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
-                                     struct mlx5_core_dev *slave)
+static int esw_master_egress_create_resources(struct mlx5_flow_namespace *egress_ns,
+                                             struct mlx5_vport *vport, size_t count)
 {
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-       struct mlx5_eswitch *esw = master->priv.eswitch;
        struct mlx5_flow_table_attr ft_attr = {
-               .max_fte = 1, .prio = 0, .level = 0,
+               .max_fte = count, .prio = 0, .level = 0,
                .flags = MLX5_FLOW_TABLE_OTHER_VPORT,
        };
-       struct mlx5_flow_namespace *egress_ns;
        struct mlx5_flow_table *acl;
        struct mlx5_flow_group *g;
-       struct mlx5_vport *vport;
        void *match_criteria;
        u32 *flow_group_in;
        int err;
 
-       vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
-       if (IS_ERR(vport))
-               return PTR_ERR(vport);
-
-       egress_ns = mlx5_get_flow_vport_acl_namespace(master,
-                                                     MLX5_FLOW_NAMESPACE_ESW_EGRESS,
-                                                     vport->index);
-       if (!egress_ns)
-               return -EINVAL;
-
        if (vport->egress.acl)
-               return -EINVAL;
+               return 0;
 
        flow_group_in = kvzalloc(inlen, GFP_KERNEL);
        if (!flow_group_in)
@@ -2602,7 +2595,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
        MLX5_SET(create_flow_group_in, flow_group_in,
                 source_eswitch_owner_vhca_id_valid, 1);
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
-       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, count);
 
        g = mlx5_create_flow_group(acl, flow_group_in);
        if (IS_ERR(g)) {
@@ -2610,19 +2603,15 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
                goto err_group;
        }
 
-       err = __esw_set_master_egress_rule(master, slave, vport, acl);
-       if (err)
-               goto err_rule;
-
        vport->egress.acl = acl;
        vport->egress.offloads.bounce_grp = g;
+       vport->egress.type = VPORT_EGRESS_ACL_TYPE_SHARED_FDB;
+       xa_init_flags(&vport->egress.offloads.bounce_rules, XA_FLAGS_ALLOC);
 
        kvfree(flow_group_in);
 
        return 0;
 
-err_rule:
-       mlx5_destroy_flow_group(g);
 err_group:
        mlx5_destroy_flow_table(acl);
 out:
@@ -2630,18 +2619,70 @@ out:
        return err;
 }
 
-static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev)
+static void esw_master_egress_destroy_resources(struct mlx5_vport *vport)
+{
+       mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
+       mlx5_destroy_flow_table(vport->egress.acl);
+}
+
+static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
+                                     struct mlx5_core_dev *slave, size_t count)
+{
+       struct mlx5_eswitch *esw = master->priv.eswitch;
+       u16 slave_index = MLX5_CAP_GEN(slave, vhca_id);
+       struct mlx5_flow_namespace *egress_ns;
+       struct mlx5_vport *vport;
+       int err;
+
+       vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+       if (IS_ERR(vport))
+               return PTR_ERR(vport);
+
+       egress_ns = mlx5_get_flow_vport_acl_namespace(master,
+                                                     MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+                                                     vport->index);
+       if (!egress_ns)
+               return -EINVAL;
+
+       if (vport->egress.acl && vport->egress.type != VPORT_EGRESS_ACL_TYPE_SHARED_FDB)
+               return 0;
+
+       err = esw_master_egress_create_resources(egress_ns, vport, count);
+       if (err)
+               return err;
+
+       if (xa_load(&vport->egress.offloads.bounce_rules, slave_index))
+               return -EINVAL;
+
+       err = __esw_set_master_egress_rule(master, slave, vport, vport->egress.acl);
+       if (err)
+               goto err_rule;
+
+       return 0;
+
+err_rule:
+       esw_master_egress_destroy_resources(vport);
+       return err;
+}
+
+static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev,
+                                        struct mlx5_core_dev *slave_dev)
 {
        struct mlx5_vport *vport;
 
        vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
                                       dev->priv.eswitch->manager_vport);
 
-       esw_acl_egress_ofld_cleanup(vport);
+       esw_acl_egress_ofld_bounce_rule_destroy(vport, MLX5_CAP_GEN(slave_dev, vhca_id));
+
+       if (xa_empty(&vport->egress.offloads.bounce_rules)) {
+               esw_acl_egress_ofld_cleanup(vport);
+               xa_destroy(&vport->egress.offloads.bounce_rules);
+       }
 }
 
-int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
-                                           struct mlx5_eswitch *slave_esw)
+int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw,
+                                            struct mlx5_eswitch *slave_esw, int max_slaves)
 {
        int err;
 
@@ -2651,7 +2692,7 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
                return err;
 
        err = esw_set_master_egress_rule(master_esw->dev,
-                                        slave_esw->dev);
+                                        slave_esw->dev, max_slaves);
        if (err)
                goto err_acl;
 
@@ -2659,21 +2700,21 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
 
 err_acl:
        esw_set_slave_root_fdb(NULL, slave_esw->dev);
-
        return err;
 }
 
-void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
                                              struct mlx5_eswitch *slave_esw)
 {
-       esw_unset_master_egress_rule(master_esw->dev);
        esw_set_slave_root_fdb(NULL, slave_esw->dev);
+       esw_unset_master_egress_rule(master_esw->dev, slave_esw->dev);
 }
 
 #define ESW_OFFLOADS_DEVCOM_PAIR       (0)
 #define ESW_OFFLOADS_DEVCOM_UNPAIR     (1)
 
-static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
+static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw,
+                                              struct mlx5_eswitch *peer_esw)
 {
        const struct mlx5_eswitch_rep_ops *ops;
        struct mlx5_eswitch_rep *rep;
@@ -2686,18 +2727,19 @@ static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
                        ops = esw->offloads.rep_ops[rep_type];
                        if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
                            ops->event)
-                               ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL);
+                               ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, peer_esw);
                }
        }
 }
 
-static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
+static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw,
+                                    struct mlx5_eswitch *peer_esw)
 {
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
        mlx5e_tc_clean_fdb_peer_flows(esw);
 #endif
-       mlx5_esw_offloads_rep_event_unpair(esw);
-       esw_del_fdb_peer_miss_rules(esw);
+       mlx5_esw_offloads_rep_event_unpair(esw, peer_esw);
+       esw_del_fdb_peer_miss_rules(esw, peer_esw->dev);
 }
 
 static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
@@ -2728,7 +2770,7 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
        return 0;
 
 err_out:
-       mlx5_esw_offloads_unpair(esw);
+       mlx5_esw_offloads_unpair(esw, peer_esw);
        return err;
 }
 
@@ -2736,7 +2778,9 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
                                         struct mlx5_eswitch *peer_esw,
                                         bool pair)
 {
+       u8 peer_idx = mlx5_get_dev_index(peer_esw->dev);
        struct mlx5_flow_root_namespace *peer_ns;
+       u8 idx = mlx5_get_dev_index(esw->dev);
        struct mlx5_flow_root_namespace *ns;
        int err;
 
@@ -2744,18 +2788,18 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
        ns = esw->dev->priv.steering->fdb_root_ns;
 
        if (pair) {
-               err = mlx5_flow_namespace_set_peer(ns, peer_ns);
+               err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_idx);
                if (err)
                        return err;
 
-               err = mlx5_flow_namespace_set_peer(peer_ns, ns);
+               err = mlx5_flow_namespace_set_peer(peer_ns, ns, idx);
                if (err) {
-                       mlx5_flow_namespace_set_peer(ns, NULL);
+                       mlx5_flow_namespace_set_peer(ns, NULL, peer_idx);
                        return err;
                }
        } else {
-               mlx5_flow_namespace_set_peer(ns, NULL);
-               mlx5_flow_namespace_set_peer(peer_ns, NULL);
+               mlx5_flow_namespace_set_peer(ns, NULL, peer_idx);
+               mlx5_flow_namespace_set_peer(peer_ns, NULL, idx);
        }
 
        return 0;
@@ -2792,18 +2836,23 @@ static int mlx5_esw_offloads_devcom_event(int event,
 
                esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true;
                peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true;
-               mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
+               esw->num_peers++;
+               peer_esw->num_peers++;
+               mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
                break;
 
        case ESW_OFFLOADS_DEVCOM_UNPAIR:
                if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)])
                        break;
 
-               mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
+               peer_esw->num_peers--;
+               esw->num_peers--;
+               if (!esw->num_peers && !peer_esw->num_peers)
+                       mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
                esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false;
                peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false;
-               mlx5_esw_offloads_unpair(peer_esw);
-               mlx5_esw_offloads_unpair(esw);
+               mlx5_esw_offloads_unpair(peer_esw, esw);
+               mlx5_esw_offloads_unpair(esw, peer_esw);
                mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
                break;
        }
@@ -2811,7 +2860,7 @@ static int mlx5_esw_offloads_devcom_event(int event,
        return 0;
 
 err_pair:
-       mlx5_esw_offloads_unpair(esw);
+       mlx5_esw_offloads_unpair(esw, peer_esw);
 err_peer:
        mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
 err_out:
@@ -2823,8 +2872,10 @@ err_out:
 void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw)
 {
        struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+       int i;
 
-       INIT_LIST_HEAD(&esw->offloads.peer_flows);
+       for (i = 0; i < MLX5_MAX_PORTS; i++)
+               INIT_LIST_HEAD(&esw->offloads.peer_flows[i]);
        mutex_init(&esw->offloads.peer_mutex);
 
        if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
@@ -2838,9 +2889,11 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw)
                                       mlx5_esw_offloads_devcom_event,
                                       esw);
 
+       esw->num_peers = 0;
        mlx5_devcom_send_event(devcom,
                               MLX5_DEVCOM_ESW_OFFLOADS,
-                              ESW_OFFLOADS_DEVCOM_PAIR, esw);
+                              ESW_OFFLOADS_DEVCOM_PAIR,
+                              ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
 }
 
 void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
@@ -2854,6 +2907,7 @@ void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
                return;
 
        mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
+                              ESW_OFFLOADS_DEVCOM_UNPAIR,
                               ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
 
        mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
index 144e594..11374c3 100644 (file)
@@ -139,7 +139,8 @@ static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace
 }
 
 static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns,
-                                 struct mlx5_flow_root_namespace *peer_ns)
+                                 struct mlx5_flow_root_namespace *peer_ns,
+                                 u8 peer_idx)
 {
        return 0;
 }
index 8ef4254..b6b9a5a 100644 (file)
@@ -93,7 +93,8 @@ struct mlx5_flow_cmds {
                                      struct mlx5_modify_hdr *modify_hdr);
 
        int (*set_peer)(struct mlx5_flow_root_namespace *ns,
-                       struct mlx5_flow_root_namespace *peer_ns);
+                       struct mlx5_flow_root_namespace *peer_ns,
+                       u8 peer_idx);
 
        int (*create_ns)(struct mlx5_flow_root_namespace *ns);
        int (*destroy_ns)(struct mlx5_flow_root_namespace *ns);
index 19da02c..4ef04aa 100644 (file)
@@ -3620,7 +3620,8 @@ void mlx5_destroy_match_definer(struct mlx5_core_dev *dev,
 }
 
 int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
-                                struct mlx5_flow_root_namespace *peer_ns)
+                                struct mlx5_flow_root_namespace *peer_ns,
+                                u8 peer_idx)
 {
        if (peer_ns && ns->mode != peer_ns->mode) {
                mlx5_core_err(ns->dev,
@@ -3628,7 +3629,7 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
                return -EINVAL;
        }
 
-       return ns->cmds->set_peer(ns, peer_ns);
+       return ns->cmds->set_peer(ns, peer_ns, peer_idx);
 }
 
 /* This function should be called only at init stage of the namespace.
index f137a06..200ec94 100644 (file)
@@ -295,7 +295,8 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
 const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void);
 
 int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
-                                struct mlx5_flow_root_namespace *peer_ns);
+                                struct mlx5_flow_root_namespace *peer_ns,
+                                u8 peer_idx);
 
 int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
                                 enum mlx5_flow_steering_mode mode);
index 5d331b9..c820f7d 100644 (file)
@@ -550,6 +550,29 @@ char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
        }
 }
 
+static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
+{
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       struct mlx5_eswitch *master_esw = dev0->priv.eswitch;
+       int err;
+       int i;
+
+       for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) {
+               struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch;
+
+               err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
+                                                              slave_esw, ldev->ports);
+               if (err)
+                       goto err;
+       }
+       return 0;
+err:
+       for (; i > MLX5_LAG_P1; i--)
+               mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
+                                                        ldev->pf[i].dev->priv.eswitch);
+       return err;
+}
+
 static int mlx5_create_lag(struct mlx5_lag *ldev,
                           struct lag_tracker *tracker,
                           enum mlx5_lag_mode mode,
@@ -557,7 +580,6 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
 {
        bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
-       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
        u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
        int err;
 
@@ -575,8 +597,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev,
        }
 
        if (shared_fdb) {
-               err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
-                                                             dev1->priv.eswitch);
+               err = mlx5_lag_create_single_fdb(ldev);
                if (err)
                        mlx5_core_err(dev0, "Can't enable single FDB mode\n");
                else
@@ -647,19 +668,21 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 {
        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
-       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+       struct mlx5_eswitch *master_esw = dev0->priv.eswitch;
        u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
        bool roce_lag = __mlx5_lag_is_roce(ldev);
        unsigned long flags = ldev->mode_flags;
        int err;
+       int i;
 
        ldev->mode = MLX5_LAG_MODE_NONE;
        ldev->mode_flags = 0;
        mlx5_lag_mp_reset(ldev);
 
        if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
-               mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch,
-                                                        dev1->priv.eswitch);
+               for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++)
+                       mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
+                                                                ldev->pf[i].dev->priv.eswitch);
                clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
        }
 
@@ -801,8 +824,8 @@ bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
            is_mdev_switchdev_mode(dev1) &&
            mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
            mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
-           mlx5_devcom_is_paired(dev0->priv.devcom,
-                                 MLX5_DEVCOM_ESW_OFFLOADS) &&
+           mlx5_devcom_comp_is_ready(dev0->priv.devcom,
+                                     MLX5_DEVCOM_ESW_OFFLOADS) &&
            MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
            MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
            MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
index b7d779d..8472bbb 100644 (file)
@@ -19,7 +19,7 @@ struct mlx5_devcom_component {
 
        mlx5_devcom_event_handler_t handler;
        struct rw_semaphore sem;
-       bool paired;
+       bool ready;
 };
 
 struct mlx5_devcom_list {
@@ -193,7 +193,7 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
 
 int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
                           enum mlx5_devcom_components id,
-                          int event,
+                          int event, int rollback_event,
                           void *event_data)
 {
        struct mlx5_devcom_component *comp;
@@ -210,84 +210,134 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
 
                if (i != devcom->idx && data) {
                        err = comp->handler(event, data, event_data);
-                       break;
+                       if (err)
+                               goto rollback;
                }
        }
 
        up_write(&comp->sem);
+       return 0;
+
+rollback:
+       while (i--) {
+               void *data = rcu_dereference_protected(comp->device[i].data,
+                                                      lockdep_is_held(&comp->sem));
+
+               if (i != devcom->idx && data)
+                       comp->handler(rollback_event, data, event_data);
+       }
+
+       up_write(&comp->sem);
        return err;
 }
 
-void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
-                           enum mlx5_devcom_components id,
-                           bool paired)
+void mlx5_devcom_comp_set_ready(struct mlx5_devcom *devcom,
+                               enum mlx5_devcom_components id,
+                               bool ready)
 {
        struct mlx5_devcom_component *comp;
 
        comp = &devcom->priv->components[id];
        WARN_ON(!rwsem_is_locked(&comp->sem));
 
-       WRITE_ONCE(comp->paired, paired);
+       WRITE_ONCE(comp->ready, ready);
 }
 
-bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
-                          enum mlx5_devcom_components id)
+bool mlx5_devcom_comp_is_ready(struct mlx5_devcom *devcom,
+                              enum mlx5_devcom_components id)
 {
        if (IS_ERR_OR_NULL(devcom))
                return false;
 
-       return READ_ONCE(devcom->priv->components[id].paired);
+       return READ_ONCE(devcom->priv->components[id].ready);
 }
 
-void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
-                               enum mlx5_devcom_components id)
+bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom *devcom,
+                                    enum mlx5_devcom_components id)
 {
        struct mlx5_devcom_component *comp;
-       int i;
 
        if (IS_ERR_OR_NULL(devcom))
-               return NULL;
+               return false;
 
        comp = &devcom->priv->components[id];
        down_read(&comp->sem);
-       if (!READ_ONCE(comp->paired)) {
+       if (!READ_ONCE(comp->ready)) {
                up_read(&comp->sem);
-               return NULL;
+               return false;
        }
 
-       for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
-               if (i != devcom->idx)
-                       break;
+       return true;
+}
+
+void mlx5_devcom_for_each_peer_end(struct mlx5_devcom *devcom,
+                                  enum mlx5_devcom_components id)
+{
+       struct mlx5_devcom_component *comp = &devcom->priv->components[id];
 
-       return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem));
+       up_read(&comp->sem);
 }
 
-void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id)
+void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom *devcom,
+                                    enum mlx5_devcom_components id,
+                                    int *i)
 {
        struct mlx5_devcom_component *comp;
-       int i;
+       void *ret;
+       int idx;
 
-       if (IS_ERR_OR_NULL(devcom))
-               return NULL;
+       comp = &devcom->priv->components[id];
 
-       for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
-               if (i != devcom->idx)
-                       break;
+       if (*i == MLX5_DEVCOM_PORTS_SUPPORTED)
+               return NULL;
+       for (idx = *i; idx < MLX5_DEVCOM_PORTS_SUPPORTED; idx++) {
+               if (idx != devcom->idx) {
+                       ret = rcu_dereference_protected(comp->device[idx].data,
+                                                       lockdep_is_held(&comp->sem));
+                       if (ret)
+                               break;
+               }
+       }
 
-       comp = &devcom->priv->components[id];
-       /* This can change concurrently, however 'data' pointer will remain
-        * valid for the duration of RCU read section.
-        */
-       if (!READ_ONCE(comp->paired))
+       if (idx == MLX5_DEVCOM_PORTS_SUPPORTED) {
+               *i = idx;
                return NULL;
+       }
+       *i = idx + 1;
 
-       return rcu_dereference(comp->device[i].data);
+       return ret;
 }
 
-void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
-                                  enum mlx5_devcom_components id)
+void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom *devcom,
+                                        enum mlx5_devcom_components id,
+                                        int *i)
 {
-       struct mlx5_devcom_component *comp = &devcom->priv->components[id];
+       struct mlx5_devcom_component *comp;
+       void *ret;
+       int idx;
 
-       up_read(&comp->sem);
+       comp = &devcom->priv->components[id];
+
+       if (*i == MLX5_DEVCOM_PORTS_SUPPORTED)
+               return NULL;
+       for (idx = *i; idx < MLX5_DEVCOM_PORTS_SUPPORTED; idx++) {
+               if (idx != devcom->idx) {
+                       /* This can change concurrently, however 'data' pointer will remain
+                        * valid for the duration of RCU read section.
+                        */
+                       if (!READ_ONCE(comp->ready))
+                               return NULL;
+                       ret = rcu_dereference(comp->device[idx].data);
+                       if (ret)
+                               break;
+               }
+       }
+
+       if (idx == MLX5_DEVCOM_PORTS_SUPPORTED) {
+               *i = idx;
+               return NULL;
+       }
+       *i = idx + 1;
+
+       return ret;
 }
index 9a496f4..bb1970b 100644 (file)
@@ -30,20 +30,33 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
 
 int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
                           enum mlx5_devcom_components id,
-                          int event,
+                          int event, int rollback_event,
                           void *event_data);
 
-void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
-                           enum mlx5_devcom_components id,
-                           bool paired);
-bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
-                          enum mlx5_devcom_components id);
+void mlx5_devcom_comp_set_ready(struct mlx5_devcom *devcom,
+                               enum mlx5_devcom_components id,
+                               bool ready);
+bool mlx5_devcom_comp_is_ready(struct mlx5_devcom *devcom,
+                              enum mlx5_devcom_components id);
 
-void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
-                               enum mlx5_devcom_components id);
-void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id);
-void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom *devcom,
+                                    enum mlx5_devcom_components id);
+void mlx5_devcom_for_each_peer_end(struct mlx5_devcom *devcom,
                                   enum mlx5_devcom_components id);
+void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom *devcom,
+                                    enum mlx5_devcom_components id, int *i);
 
-#endif
+#define mlx5_devcom_for_each_peer_entry(devcom, id, data, i)                   \
+       for (i = 0, data = mlx5_devcom_get_next_peer_data(devcom, id, &i);      \
+            data;                                                              \
+            data = mlx5_devcom_get_next_peer_data(devcom, id, &i))
+
+void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom *devcom,
+                                        enum mlx5_devcom_components id, int *i);
 
+#define mlx5_devcom_for_each_peer_entry_rcu(devcom, id, data, i)               \
+       for (i = 0, data = mlx5_devcom_get_next_peer_data_rcu(devcom, id, &i);  \
+            data;                                                              \
+            data = mlx5_devcom_get_next_peer_data_rcu(devcom, id, &i))
+
+#endif
index 0eb9a8d..4e9bc18 100644 (file)
@@ -2071,8 +2071,9 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn,
        struct mlx5dr_action *action;
        u8 peer_vport;
 
-       peer_vport = vhca_id_valid && (vhca_id != dmn->info.caps.gvmi);
-       vport_dmn = peer_vport ? dmn->peer_dmn : dmn;
+       peer_vport = vhca_id_valid && mlx5_core_is_pf(dmn->mdev) &&
+               (vhca_id != dmn->info.caps.gvmi);
+       vport_dmn = peer_vport ? dmn->peer_dmn[vhca_id] : dmn;
        if (!vport_dmn) {
                mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n");
                return NULL;
index 9a2dfe6..75dc85d 100644 (file)
@@ -555,17 +555,18 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
 }
 
 void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
-                           struct mlx5dr_domain *peer_dmn)
+                           struct mlx5dr_domain *peer_dmn,
+                           u8 peer_idx)
 {
        mlx5dr_domain_lock(dmn);
 
-       if (dmn->peer_dmn)
-               refcount_dec(&dmn->peer_dmn->refcount);
+       if (dmn->peer_dmn[peer_idx])
+               refcount_dec(&dmn->peer_dmn[peer_idx]->refcount);
 
-       dmn->peer_dmn = peer_dmn;
+       dmn->peer_dmn[peer_idx] = peer_dmn;
 
-       if (dmn->peer_dmn)
-               refcount_inc(&dmn->peer_dmn->refcount);
+       if (dmn->peer_dmn[peer_idx])
+               refcount_inc(&dmn->peer_dmn[peer_idx]->refcount);
 
        mlx5dr_domain_unlock(dmn);
 }
index 2010d4a..69d7a8f 100644 (file)
@@ -1647,6 +1647,7 @@ dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
                                 u8 *tag)
 {
        struct mlx5dr_match_misc *misc = &value->misc;
+       int id = misc->source_eswitch_owner_vhca_id;
        struct mlx5dr_cmd_vport_cap *vport_cap;
        struct mlx5dr_domain *dmn = sb->dmn;
        struct mlx5dr_domain *vport_dmn;
@@ -1657,11 +1658,11 @@ dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
 
        if (sb->vhca_id_valid) {
                /* Find port GVMI based on the eswitch_owner_vhca_id */
-               if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi)
+               if (id == dmn->info.caps.gvmi)
                        vport_dmn = dmn;
-               else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id ==
-                                          dmn->peer_dmn->info.caps.gvmi))
-                       vport_dmn = dmn->peer_dmn;
+               else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] &&
+                        (id == dmn->peer_dmn[id]->info.caps.gvmi))
+                       vport_dmn = dmn->peer_dmn[id];
                else
                        return -EINVAL;
 
index 4c0704a..f4ef0b2 100644 (file)
@@ -1979,6 +1979,7 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
                                            u8 *tag)
 {
        struct mlx5dr_match_misc *misc = &value->misc;
+       int id = misc->source_eswitch_owner_vhca_id;
        struct mlx5dr_cmd_vport_cap *vport_cap;
        struct mlx5dr_domain *dmn = sb->dmn;
        struct mlx5dr_domain *vport_dmn;
@@ -1988,11 +1989,11 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
 
        if (sb->vhca_id_valid) {
                /* Find port GVMI based on the eswitch_owner_vhca_id */
-               if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi)
+               if (id == dmn->info.caps.gvmi)
                        vport_dmn = dmn;
-               else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id ==
-                                          dmn->peer_dmn->info.caps.gvmi))
-                       vport_dmn = dmn->peer_dmn;
+               else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] &&
+                        (id == dmn->peer_dmn[id]->info.caps.gvmi))
+                       vport_dmn = dmn->peer_dmn[id];
                else
                        return -EINVAL;
 
index 678a993..1622dbb 100644 (file)
@@ -935,7 +935,7 @@ struct mlx5dr_domain_info {
 };
 
 struct mlx5dr_domain {
-       struct mlx5dr_domain *peer_dmn;
+       struct mlx5dr_domain *peer_dmn[MLX5_MAX_PORTS];
        struct mlx5_core_dev *mdev;
        u32 pdn;
        struct mlx5_uars_page *uar;
index 9846537..c6fda1c 100644 (file)
@@ -770,14 +770,15 @@ restore_fte:
 }
 
 static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns,
-                               struct mlx5_flow_root_namespace *peer_ns)
+                               struct mlx5_flow_root_namespace *peer_ns,
+                               u8 peer_idx)
 {
        struct mlx5dr_domain *peer_domain = NULL;
 
        if (peer_ns)
                peer_domain = peer_ns->fs_dr_domain.dr_domain;
        mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain,
-                              peer_domain);
+                              peer_domain, peer_idx);
        return 0;
 }
 
index 9afd268..5ba88f2 100644 (file)
@@ -48,7 +48,8 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *domain);
 int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags);
 
 void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
-                           struct mlx5dr_domain *peer_dmn);
+                           struct mlx5dr_domain *peer_dmn,
+                           u8 peer_idx);
 
 struct mlx5dr_table *
 mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags,