2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/netdevice.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/eswitch.h>
36 #include <linux/mlx5/vport.h>
37 #include "lib/devcom.h"
38 #include "mlx5_core.h"
43 /* General purpose, use for short periods of time.
44 * Beware of lock dependencies (preferably, no locks should be acquired
47 static DEFINE_SPINLOCK(lag_lock);
49 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
50 u8 remap_port2, bool shared_fdb)
52 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
53 void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
55 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
57 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
58 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
59 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
61 return mlx5_cmd_exec_in(dev, create_lag, in);
64 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
67 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
68 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
70 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
71 MLX5_SET(modify_lag_in, in, field_select, 0x1);
73 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
74 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
76 return mlx5_cmd_exec_in(dev, modify_lag, in);
79 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
81 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
83 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
85 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
87 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
89 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
91 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
93 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
95 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
97 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
99 static int mlx5_lag_netdev_event(struct notifier_block *this,
100 unsigned long event, void *ptr);
101 static void mlx5_do_bond_work(struct work_struct *work);
103 static void mlx5_ldev_free(struct kref *ref)
105 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
107 if (ldev->nb.notifier_call)
108 unregister_netdevice_notifier_net(&init_net, &ldev->nb);
109 mlx5_lag_mp_cleanup(ldev);
110 cancel_delayed_work_sync(&ldev->bond_work);
111 destroy_workqueue(ldev->wq);
115 static void mlx5_ldev_put(struct mlx5_lag *ldev)
117 kref_put(&ldev->ref, mlx5_ldev_free);
120 static void mlx5_ldev_get(struct mlx5_lag *ldev)
122 kref_get(&ldev->ref);
125 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
127 struct mlx5_lag *ldev;
130 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
134 ldev->wq = create_singlethread_workqueue("mlx5_lag");
140 kref_init(&ldev->ref);
141 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
143 ldev->nb.notifier_call = mlx5_lag_netdev_event;
144 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
145 ldev->nb.notifier_call = NULL;
146 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
149 err = mlx5_lag_mp_init(ldev);
151 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
157 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
158 struct net_device *ndev)
162 for (i = 0; i < MLX5_MAX_PORTS; i++)
163 if (ldev->pf[i].netdev == ndev)
169 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
171 return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
174 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
176 return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
179 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
180 u8 *port1, u8 *port2)
185 p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
186 tracker->netdev_state[MLX5_LAG_P1].link_up;
188 p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
189 tracker->netdev_state[MLX5_LAG_P2].link_up;
193 if ((!p1en && !p2en) || (p1en && p2en))
202 void mlx5_modify_lag(struct mlx5_lag *ldev,
203 struct lag_tracker *tracker)
205 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
206 u8 v2p_port1, v2p_port2;
209 mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
212 if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
213 v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
214 ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
215 ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
217 mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
218 ldev->v2p_map[MLX5_LAG_P1],
219 ldev->v2p_map[MLX5_LAG_P2]);
221 err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
224 "Failed to modify LAG (%d)\n",
229 static int mlx5_create_lag(struct mlx5_lag *ldev,
230 struct lag_tracker *tracker,
233 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
234 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
235 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
238 mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
239 &ldev->v2p_map[MLX5_LAG_P2]);
241 mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
242 ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
245 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
246 ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
249 "Failed to create LAG (%d)\n",
255 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
258 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
260 mlx5_core_info(dev0, "Operation mode is single FDB\n");
264 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
265 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
267 "Failed to deactivate RoCE LAG; driver restart required\n");
273 int mlx5_activate_lag(struct mlx5_lag *ldev,
274 struct lag_tracker *tracker,
278 bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
279 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
282 err = mlx5_create_lag(ldev, tracker, shared_fdb);
286 "Failed to activate RoCE LAG\n");
289 "Failed to activate VF LAG\n"
290 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
295 ldev->flags |= flags;
296 ldev->shared_fdb = shared_fdb;
300 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
302 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
303 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
304 bool roce_lag = __mlx5_lag_is_roce(ldev);
307 ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
308 mlx5_lag_mp_reset(ldev);
310 if (ldev->shared_fdb) {
311 mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
312 ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
313 ldev->shared_fdb = false;
316 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
317 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
321 "Failed to deactivate RoCE LAG; driver restart required\n");
324 "Failed to deactivate VF LAG; driver restart required\n"
325 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
332 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
334 if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
337 #ifdef CONFIG_MLX5_ESWITCH
338 return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
339 ldev->pf[MLX5_LAG_P2].dev);
341 return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
342 !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
346 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
350 for (i = 0; i < MLX5_MAX_PORTS; i++) {
351 if (!ldev->pf[i].dev)
354 if (ldev->pf[i].dev->priv.flags &
355 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
358 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
359 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
363 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
367 for (i = 0; i < MLX5_MAX_PORTS; i++) {
368 if (!ldev->pf[i].dev)
371 if (ldev->pf[i].dev->priv.flags &
372 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
375 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
376 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
380 static void mlx5_disable_lag(struct mlx5_lag *ldev)
382 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
383 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
384 bool shared_fdb = ldev->shared_fdb;
388 roce_lag = __mlx5_lag_is_roce(ldev);
391 mlx5_lag_remove_devices(ldev);
392 } else if (roce_lag) {
393 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
394 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
395 mlx5_rescan_drivers_locked(dev0);
397 mlx5_nic_vport_disable_roce(dev1);
400 err = mlx5_deactivate_lag(ldev);
404 if (shared_fdb || roce_lag)
405 mlx5_lag_add_devices(ldev);
408 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
409 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
410 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
411 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
415 static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
417 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
418 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
420 if (is_mdev_switchdev_mode(dev0) &&
421 is_mdev_switchdev_mode(dev1) &&
422 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
423 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
424 mlx5_devcom_is_paired(dev0->priv.devcom,
425 MLX5_DEVCOM_ESW_OFFLOADS) &&
426 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
427 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
428 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
434 static void mlx5_do_bond(struct mlx5_lag *ldev)
436 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
437 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
438 struct lag_tracker tracker;
439 bool do_bond, roce_lag;
442 if (!mlx5_lag_is_ready(ldev)) {
445 tracker = ldev->tracker;
447 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
450 if (do_bond && !__mlx5_lag_is_active(ldev)) {
451 bool shared_fdb = mlx5_shared_fdb_supported(ldev);
453 roce_lag = !mlx5_sriov_is_enabled(dev0) &&
454 !mlx5_sriov_is_enabled(dev1);
456 #ifdef CONFIG_MLX5_ESWITCH
457 roce_lag = roce_lag &&
458 dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
459 dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
462 if (shared_fdb || roce_lag)
463 mlx5_lag_remove_devices(ldev);
465 err = mlx5_activate_lag(ldev, &tracker,
466 roce_lag ? MLX5_LAG_FLAG_ROCE :
470 if (shared_fdb || roce_lag)
471 mlx5_lag_add_devices(ldev);
474 } else if (roce_lag) {
475 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
476 mlx5_rescan_drivers_locked(dev0);
477 mlx5_nic_vport_enable_roce(dev1);
478 } else if (shared_fdb) {
479 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
480 mlx5_rescan_drivers_locked(dev0);
482 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
484 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
487 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
488 mlx5_rescan_drivers_locked(dev0);
489 mlx5_deactivate_lag(ldev);
490 mlx5_lag_add_devices(ldev);
491 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
492 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
493 mlx5_core_err(dev0, "Failed to enable lag\n");
497 } else if (do_bond && __mlx5_lag_is_active(ldev)) {
498 mlx5_modify_lag(ldev, &tracker);
499 } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
500 mlx5_disable_lag(ldev);
504 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
506 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
509 static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
510 struct mlx5_core_dev *dev1)
513 mlx5_esw_lock(dev0->priv.eswitch);
515 mlx5_esw_lock(dev1->priv.eswitch);
518 static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
519 struct mlx5_core_dev *dev1)
522 mlx5_esw_unlock(dev1->priv.eswitch);
524 mlx5_esw_unlock(dev0->priv.eswitch);
527 static void mlx5_do_bond_work(struct work_struct *work)
529 struct delayed_work *delayed_work = to_delayed_work(work);
530 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
532 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
533 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
536 status = mlx5_dev_list_trylock();
538 mlx5_queue_bond_work(ldev, HZ);
542 if (ldev->mode_changes_in_progress) {
543 mlx5_dev_list_unlock();
544 mlx5_queue_bond_work(ldev, HZ);
548 mlx5_lag_lock_eswitches(dev0, dev1);
550 mlx5_lag_unlock_eswitches(dev0, dev1);
551 mlx5_dev_list_unlock();
554 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
555 struct lag_tracker *tracker,
556 struct net_device *ndev,
557 struct netdev_notifier_changeupper_info *info)
559 struct net_device *upper = info->upper_dev, *ndev_tmp;
560 struct netdev_lag_upper_info *lag_upper_info = NULL;
561 bool is_bonded, is_in_lag, mode_supported;
566 if (!netif_is_lag_master(upper))
570 lag_upper_info = info->upper_info;
572 /* The event may still be of interest if the slave does not belong to
573 * us, but is enslaved to a master which has one or more of our netdevs
574 * as slaves (e.g., if a new slave is added to a master that bonds two
575 * of our netdevs, we should unbond).
578 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
579 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
581 bond_status |= (1 << idx);
587 /* None of this lagdev's netdevs are slaves of this master. */
588 if (!(bond_status & 0x3))
592 tracker->tx_type = lag_upper_info->tx_type;
594 /* Determine bonding status:
595 * A device is considered bonded if both its physical ports are slaves
596 * of the same lag master, and only them.
598 is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
600 if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
601 NL_SET_ERR_MSG_MOD(info->info.extack,
602 "Can't activate LAG offload, PF is configured with more than 64 VFs");
606 /* Lag mode must be activebackup or hash. */
607 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
608 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
610 if (is_in_lag && !mode_supported)
611 NL_SET_ERR_MSG_MOD(info->info.extack,
612 "Can't activate LAG offload, TX type isn't supported");
614 is_bonded = is_in_lag && mode_supported;
615 if (tracker->is_bonded != is_bonded) {
616 tracker->is_bonded = is_bonded;
623 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
624 struct lag_tracker *tracker,
625 struct net_device *ndev,
626 struct netdev_notifier_changelowerstate_info *info)
628 struct netdev_lag_lower_state_info *lag_lower_info;
631 if (!netif_is_lag_port(ndev))
634 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
638 /* This information is used to determine virtual to physical
641 lag_lower_info = info->lower_state_info;
645 tracker->netdev_state[idx] = *lag_lower_info;
650 static int mlx5_lag_netdev_event(struct notifier_block *this,
651 unsigned long event, void *ptr)
653 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
654 struct lag_tracker tracker;
655 struct mlx5_lag *ldev;
658 if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
661 ldev = container_of(this, struct mlx5_lag, nb);
663 if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
666 tracker = ldev->tracker;
669 case NETDEV_CHANGEUPPER:
670 changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
673 case NETDEV_CHANGELOWERSTATE:
674 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
679 ldev->tracker = tracker;
682 mlx5_queue_bond_work(ldev, 0);
687 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
688 struct mlx5_core_dev *dev,
689 struct net_device *netdev)
691 unsigned int fn = PCI_FUNC(dev->pdev->devfn);
693 if (fn >= MLX5_MAX_PORTS)
696 spin_lock(&lag_lock);
697 ldev->pf[fn].netdev = netdev;
698 ldev->tracker.netdev_state[fn].link_up = 0;
699 ldev->tracker.netdev_state[fn].tx_enabled = 0;
700 spin_unlock(&lag_lock);
703 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
704 struct net_device *netdev)
708 spin_lock(&lag_lock);
709 for (i = 0; i < MLX5_MAX_PORTS; i++) {
710 if (ldev->pf[i].netdev == netdev) {
711 ldev->pf[i].netdev = NULL;
715 spin_unlock(&lag_lock);
718 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
719 struct mlx5_core_dev *dev)
721 unsigned int fn = PCI_FUNC(dev->pdev->devfn);
723 if (fn >= MLX5_MAX_PORTS)
726 ldev->pf[fn].dev = dev;
727 dev->priv.lag = ldev;
730 /* Must be called with intf_mutex held */
731 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
732 struct mlx5_core_dev *dev)
736 for (i = 0; i < MLX5_MAX_PORTS; i++)
737 if (ldev->pf[i].dev == dev)
740 if (i == MLX5_MAX_PORTS)
743 ldev->pf[i].dev = NULL;
744 dev->priv.lag = NULL;
747 /* Must be called with intf_mutex held */
748 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
750 struct mlx5_lag *ldev = NULL;
751 struct mlx5_core_dev *tmp_dev;
753 if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
754 !MLX5_CAP_GEN(dev, lag_master) ||
755 MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
758 tmp_dev = mlx5_get_next_phys_dev(dev);
760 ldev = tmp_dev->priv.lag;
763 ldev = mlx5_lag_dev_alloc(dev);
765 mlx5_core_err(dev, "Failed to alloc lag dev\n");
769 if (ldev->mode_changes_in_progress)
774 mlx5_ldev_add_mdev(ldev, dev);
779 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
781 struct mlx5_lag *ldev;
783 ldev = mlx5_lag_dev(dev);
788 mlx5_dev_list_lock();
789 if (ldev->mode_changes_in_progress) {
790 mlx5_dev_list_unlock();
794 mlx5_ldev_remove_mdev(ldev, dev);
795 mlx5_dev_list_unlock();
799 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
804 mlx5_dev_list_lock();
805 err = __mlx5_lag_dev_add_mdev(dev);
807 mlx5_dev_list_unlock();
811 mlx5_dev_list_unlock();
814 /* Must be called with intf_mutex held */
815 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
816 struct net_device *netdev)
818 struct mlx5_lag *ldev;
820 ldev = mlx5_lag_dev(dev);
824 mlx5_ldev_remove_netdev(ldev, netdev);
825 ldev->flags &= ~MLX5_LAG_FLAG_READY;
827 if (__mlx5_lag_is_active(ldev))
828 mlx5_queue_bond_work(ldev, 0);
831 /* Must be called with intf_mutex held */
832 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
833 struct net_device *netdev)
835 struct mlx5_lag *ldev;
838 ldev = mlx5_lag_dev(dev);
842 mlx5_ldev_add_netdev(ldev, dev, netdev);
844 for (i = 0; i < MLX5_MAX_PORTS; i++)
845 if (!ldev->pf[i].dev)
848 if (i >= MLX5_MAX_PORTS)
849 ldev->flags |= MLX5_LAG_FLAG_READY;
850 mlx5_queue_bond_work(ldev, 0);
853 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
855 struct mlx5_lag *ldev;
858 spin_lock(&lag_lock);
859 ldev = mlx5_lag_dev(dev);
860 res = ldev && __mlx5_lag_is_roce(ldev);
861 spin_unlock(&lag_lock);
865 EXPORT_SYMBOL(mlx5_lag_is_roce);
867 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
869 struct mlx5_lag *ldev;
872 spin_lock(&lag_lock);
873 ldev = mlx5_lag_dev(dev);
874 res = ldev && __mlx5_lag_is_active(ldev);
875 spin_unlock(&lag_lock);
879 EXPORT_SYMBOL(mlx5_lag_is_active);
881 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
883 struct mlx5_lag *ldev;
886 spin_lock(&lag_lock);
887 ldev = mlx5_lag_dev(dev);
888 res = ldev && __mlx5_lag_is_active(ldev) &&
889 dev == ldev->pf[MLX5_LAG_P1].dev;
890 spin_unlock(&lag_lock);
894 EXPORT_SYMBOL(mlx5_lag_is_master);
896 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
898 struct mlx5_lag *ldev;
901 spin_lock(&lag_lock);
902 ldev = mlx5_lag_dev(dev);
903 res = ldev && __mlx5_lag_is_sriov(ldev);
904 spin_unlock(&lag_lock);
908 EXPORT_SYMBOL(mlx5_lag_is_sriov);
910 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
912 struct mlx5_lag *ldev;
915 spin_lock(&lag_lock);
916 ldev = mlx5_lag_dev(dev);
917 res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
918 spin_unlock(&lag_lock);
922 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
924 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
926 struct mlx5_core_dev *dev0;
927 struct mlx5_core_dev *dev1;
928 struct mlx5_lag *ldev;
930 ldev = mlx5_lag_dev(dev);
934 mlx5_dev_list_lock();
936 dev0 = ldev->pf[MLX5_LAG_P1].dev;
937 dev1 = ldev->pf[MLX5_LAG_P2].dev;
939 ldev->mode_changes_in_progress++;
940 if (__mlx5_lag_is_active(ldev)) {
941 mlx5_lag_lock_eswitches(dev0, dev1);
942 mlx5_disable_lag(ldev);
943 mlx5_lag_unlock_eswitches(dev0, dev1);
945 mlx5_dev_list_unlock();
948 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
950 struct mlx5_lag *ldev;
952 ldev = mlx5_lag_dev(dev);
956 mlx5_dev_list_lock();
957 ldev->mode_changes_in_progress--;
958 mlx5_dev_list_unlock();
959 mlx5_queue_bond_work(ldev, 0);
962 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
964 struct net_device *ndev = NULL;
965 struct mlx5_lag *ldev;
967 spin_lock(&lag_lock);
968 ldev = mlx5_lag_dev(dev);
970 if (!(ldev && __mlx5_lag_is_roce(ldev)))
973 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
974 ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
975 ldev->pf[MLX5_LAG_P1].netdev :
976 ldev->pf[MLX5_LAG_P2].netdev;
978 ndev = ldev->pf[MLX5_LAG_P1].netdev;
984 spin_unlock(&lag_lock);
988 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
990 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
991 struct net_device *slave)
993 struct mlx5_lag *ldev;
996 spin_lock(&lag_lock);
997 ldev = mlx5_lag_dev(dev);
998 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1001 if (ldev->pf[MLX5_LAG_P1].netdev == slave)
1006 port = ldev->v2p_map[port];
1009 spin_unlock(&lag_lock);
1012 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1014 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1016 struct mlx5_core_dev *peer_dev = NULL;
1017 struct mlx5_lag *ldev;
1019 spin_lock(&lag_lock);
1020 ldev = mlx5_lag_dev(dev);
1024 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1025 ldev->pf[MLX5_LAG_P2].dev :
1026 ldev->pf[MLX5_LAG_P1].dev;
1029 spin_unlock(&lag_lock);
1032 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1034 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1039 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1040 struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
1041 struct mlx5_lag *ldev;
1046 out = kvzalloc(outlen, GFP_KERNEL);
1050 memset(values, 0, sizeof(*values) * num_counters);
1052 spin_lock(&lag_lock);
1053 ldev = mlx5_lag_dev(dev);
1054 if (ldev && __mlx5_lag_is_active(ldev)) {
1055 num_ports = MLX5_MAX_PORTS;
1056 mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
1057 mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
1060 mdev[MLX5_LAG_P1] = dev;
1062 spin_unlock(&lag_lock);
1064 for (i = 0; i < num_ports; ++i) {
1065 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1067 MLX5_SET(query_cong_statistics_in, in, opcode,
1068 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1069 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1074 for (j = 0; j < num_counters; ++j)
1075 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1082 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);