From 9464a3d68ea99ccac7e3518e1dfd366ac80bbc90 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 22 Jun 2023 15:33:09 +0200 Subject: [PATCH] mlxsw: spectrum_router: Track next hops at CRIFs Move the list of next hops from struct mlxsw_sp_rif to mlxsw_sp_crif. The reason is that eventually, next hops for mlxsw uppers should be offloaded and unoffloaded on demand as a netdevice becomes an upper, or stops being one. Currently, next hops are tracked at RIFs, but RIFs do not exist when a netdevice is not an mlxsw uppers. CRIFs are kept track of throughout the netdevice lifetime. Correspondingly, track at each next hop not its RIF, but its CRIF (from which a RIF can always be deduced). Note that now that next hops are tracked at a CRIF, it is not necessary to move each over to a new RIF when it is necessary to edit a RIF. Therefore drop mlxsw_sp_nexthop_rif_migrate() and have mlxsw_sp_rif_migrate_destroy() call mlxsw_sp_nexthop_rif_update() directly. Signed-off-by: Petr Machata Reviewed-by: Danielle Ratson Link: https://lore.kernel.org/r/e7c1c0a7dd13883b0f09aeda12c4fcf4d63a70e3.1687438411.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 127 ++++++++++++--------- 1 file changed, 75 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6c9244c..445ba7f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -59,6 +59,7 @@ struct mlxsw_sp_crif { struct mlxsw_sp_crif_key key; struct rhash_head ht_node; bool can_destroy; + struct list_head nexthop_list; struct mlxsw_sp_rif *rif; }; @@ -70,7 +71,6 @@ static const struct rhashtable_params mlxsw_sp_crif_ht_params = { struct mlxsw_sp_rif { struct mlxsw_sp_crif *crif; /* NULL for underlay RIF */ - struct list_head nexthop_list; struct list_head neigh_list; struct mlxsw_sp_fid *fid; unsigned char addr[ETH_ALEN]; @@ -1083,6 +1083,7 @@ static void mlxsw_sp_crif_init(struct mlxsw_sp_crif *crif, struct net_device *dev) { crif->key.dev = dev; + INIT_LIST_HEAD(&crif->nexthop_list); } static struct mlxsw_sp_crif * @@ -1103,6 +1104,7 @@ static void mlxsw_sp_crif_free(struct mlxsw_sp_crif *crif) if (WARN_ON(crif->rif)) return; + WARN_ON(!list_empty(&crif->nexthop_list)); kfree(crif); } @@ -1720,17 +1722,26 @@ static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); } -static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *old_rif, - struct mlxsw_sp_rif *new_rif); +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); + static void mlxsw_sp_rif_migrate_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *old_rif, struct mlxsw_sp_rif *new_rif, bool migrate_nhs) { + struct mlxsw_sp_crif *crif = old_rif->crif; + struct mlxsw_sp_crif mock_crif = {}; + if (migrate_nhs) - mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, old_rif, new_rif); + mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); + /* Plant a mock CRIF so that destroying the old RIF doesn't unoffload + * our nexthops and IPIP tunnels, and doesn't sever the crif->rif link. + */ + mlxsw_sp_crif_init(&mock_crif, crif->key.dev); + old_rif->crif = &mock_crif; + mock_crif.rif = old_rif; mlxsw_sp_rif_destroy(old_rif); } @@ -1756,9 +1767,6 @@ mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif); - /** * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry. * @mlxsw_sp: mlxsw_sp. @@ -2987,7 +2995,7 @@ struct mlxsw_sp_nexthop_key { struct mlxsw_sp_nexthop { struct list_head neigh_list_node; /* member of neigh entry list */ - struct list_head rif_list_node; + struct list_head crif_list_node; struct list_head router_list_node; struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group * this nexthop belongs to @@ -3000,7 +3008,7 @@ struct mlxsw_sp_nexthop { int nh_weight; int norm_nh_weight; int num_adj_entries; - struct mlxsw_sp_rif *rif; + struct mlxsw_sp_crif *crif; u8 should_offload:1, /* set indicates this nexthop should be written * to the adjacency table. */ @@ -3023,9 +3031,9 @@ struct mlxsw_sp_nexthop { static struct net_device * mlxsw_sp_nexthop_dev(const struct mlxsw_sp_nexthop *nh) { - if (nh->rif) - return mlxsw_sp_rif_dev(nh->rif); - return NULL; + if (!nh->crif) + return NULL; + return nh->crif->key.dev; } enum mlxsw_sp_nexthop_group_type { @@ -3050,7 +3058,11 @@ struct mlxsw_sp_nexthop_group_info { static struct mlxsw_sp_rif * mlxsw_sp_nhgi_rif(const struct mlxsw_sp_nexthop_group_info *nhgi) { - return nhgi->nexthops[0].rif; + struct mlxsw_sp_crif *crif = nhgi->nexthops[0].crif; + + if (!crif) + return NULL; + return crif->rif; } struct mlxsw_sp_nexthop_group_vr_key { @@ -3174,7 +3186,9 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) { - return nh->rif; + if (WARN_ON(!nh->crif)) + return NULL; + return nh->crif->rif; } bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) @@ -3559,11 +3573,12 @@ static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, bool force, char *ratr_pl) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh); enum mlxsw_reg_ratr_op op; u16 rif_index; - rif_index = nh->rif ? nh->rif->rif_index : - mlxsw_sp->router->lb_crif->rif->rif_index; + rif_index = rif ? rif->rif_index : + mlxsw_sp->router->lb_crif->rif->rif_index; op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET, @@ -4181,23 +4196,23 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, } } -static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, - struct mlxsw_sp_rif *rif) +static void mlxsw_sp_nexthop_crif_init(struct mlxsw_sp_nexthop *nh, + struct mlxsw_sp_crif *crif) { - if (nh->rif) + if (nh->crif) return; - nh->rif = rif; - list_add(&nh->rif_list_node, &rif->nexthop_list); + nh->crif = crif; + list_add(&nh->crif_list_node, &crif->nexthop_list); } -static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop_crif_fini(struct mlxsw_sp_nexthop *nh) { - if (!nh->rif) + if (!nh->crif) return; - list_del(&nh->rif_list_node); - nh->rif = NULL; + list_del(&nh->crif_list_node); + nh->crif = NULL; } static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, @@ -4209,6 +4224,9 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, u8 nud_state, dead; int err; + if (WARN_ON(!nh->crif->rif)) + return 0; + if (!nh->nhgi->gateway || nh->neigh_entry) return 0; dev = mlxsw_sp_nexthop_dev(nh); @@ -4299,15 +4317,20 @@ static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, struct mlxsw_sp_ipip_entry *ipip_entry) { + struct mlxsw_sp_crif *crif; bool removing; if (!nh->nhgi->gateway || nh->ipip_entry) return; + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, ipip_entry->ol_dev); + if (WARN_ON(!crif)) + return; + nh->ipip_entry = ipip_entry; removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev); __mlxsw_sp_nexthop_neigh_update(nh, removing); - mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common); + mlxsw_sp_nexthop_crif_init(nh, crif); } static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, @@ -4339,7 +4362,7 @@ static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; - struct mlxsw_sp_rif *rif; + struct mlxsw_sp_crif *crif; int err; ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); @@ -4353,11 +4376,15 @@ static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, dev); + if (!crif) + return 0; + + mlxsw_sp_nexthop_crif_init(nh, crif); + + if (!crif->rif) return 0; - mlxsw_sp_nexthop_rif_init(nh, rif); err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); if (err) goto err_neigh_init; @@ -4365,7 +4392,7 @@ static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, return 0; err_neigh_init: - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_crif_fini(nh); return err; } @@ -4386,7 +4413,7 @@ static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_crif_fini(nh); } static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, @@ -4479,7 +4506,7 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh; bool removing; - list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) { + list_for_each_entry(nh, &rif->crif->nexthop_list, crif_list_node) { switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: removing = false; @@ -4497,25 +4524,14 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, } } -static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *old_rif, - struct mlxsw_sp_rif *new_rif) -{ - struct mlxsw_sp_nexthop *nh; - - list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list); - list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node) - nh->rif = new_rif; - mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); -} - static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { struct mlxsw_sp_nexthop *nh, *tmp; - list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { - mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list, + crif_list_node) { + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } @@ -4857,13 +4873,13 @@ static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp, * via an egress RIF, they still need to be programmed using a * valid RIF, so use the loopback RIF created during init. */ - nh->rif = mlxsw_sp->router->lb_crif->rif; + nh->crif = mlxsw_sp->router->lb_crif; } static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - nh->rif = NULL; + nh->crif = NULL; nh->should_offload = 0; } @@ -7871,6 +7887,9 @@ static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { + /* Signal to nexthop cleanup that the RIF is going away. */ + rif->crif->rif = NULL; + mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index); mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif); mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); @@ -7989,7 +8008,6 @@ static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, if (!rif) return NULL; - INIT_LIST_HEAD(&rif->nexthop_list); INIT_LIST_HEAD(&rif->neigh_list); if (l3_dev) { ether_addr_copy(rif->addr, l3_dev->dev_addr); @@ -8008,7 +8026,6 @@ static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif) { WARN_ON(!list_empty(&rif->neigh_list)); - WARN_ON(!list_empty(&rif->nexthop_list)); if (rif->crif) rif->crif->rif = NULL; @@ -9290,7 +9307,13 @@ err_netdev_insert: static void mlxsw_sp_crif_unregister(struct mlxsw_sp_router *router, struct mlxsw_sp_crif *crif) { + struct mlxsw_sp_nexthop *nh, *tmp; + mlxsw_sp_crif_remove(router, crif); + + list_for_each_entry_safe(nh, tmp, &crif->nexthop_list, crif_list_node) + mlxsw_sp_nexthop_type_fini(router->mlxsw_sp, nh); + if (crif->rif) crif->can_destroy = true; else -- 2.7.4