1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <linux/net_namespace.h>
20 #include <linux/mutex.h>
21 #include <linux/genalloc.h>
22 #include <net/netevent.h>
23 #include <net/neighbour.h>
25 #include <net/inet_dscp.h>
26 #include <net/ip_fib.h>
27 #include <net/ip6_fib.h>
28 #include <net/nexthop.h>
29 #include <net/fib_rules.h>
30 #include <net/ip_tunnels.h>
31 #include <net/l3mdev.h>
32 #include <net/addrconf.h>
33 #include <net/ndisc.h>
35 #include <net/fib_notifier.h>
36 #include <net/switchdev.h>
41 #include "spectrum_cnt.h"
42 #include "spectrum_dpipe.h"
43 #include "spectrum_ipip.h"
44 #include "spectrum_mr.h"
45 #include "spectrum_mr_tcam.h"
46 #include "spectrum_router.h"
47 #include "spectrum_span.h"
51 struct mlxsw_sp_lpm_tree;
52 struct mlxsw_sp_rif_ops;
54 struct mlxsw_sp_crif_key {
55 struct net_device *dev;
58 struct mlxsw_sp_crif {
59 struct mlxsw_sp_crif_key key;
60 struct rhash_head ht_node;
62 struct list_head nexthop_list;
63 struct mlxsw_sp_rif *rif;
66 static const struct rhashtable_params mlxsw_sp_crif_ht_params = {
67 .key_offset = offsetof(struct mlxsw_sp_crif, key),
68 .key_len = sizeof_field(struct mlxsw_sp_crif, key),
69 .head_offset = offsetof(struct mlxsw_sp_crif, ht_node),
73 struct mlxsw_sp_crif *crif; /* NULL for underlay RIF */
74 netdevice_tracker dev_tracker;
75 struct list_head neigh_list;
76 struct mlxsw_sp_fid *fid;
77 unsigned char addr[ETH_ALEN];
83 const struct mlxsw_sp_rif_ops *ops;
84 struct mlxsw_sp *mlxsw_sp;
86 unsigned int counter_ingress;
87 bool counter_ingress_valid;
88 unsigned int counter_egress;
89 bool counter_egress_valid;
92 static struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
96 return rif->crif->key.dev;
99 struct mlxsw_sp_rif_params {
100 struct net_device *dev;
110 struct mlxsw_sp_rif_subport {
111 struct mlxsw_sp_rif common;
112 refcount_t ref_count;
121 struct mlxsw_sp_rif_ipip_lb {
122 struct mlxsw_sp_rif common;
123 struct mlxsw_sp_rif_ipip_lb_config lb_config;
124 u16 ul_vr_id; /* Spectrum-1. */
125 u16 ul_rif_id; /* Spectrum-2+. */
128 struct mlxsw_sp_rif_params_ipip_lb {
129 struct mlxsw_sp_rif_params common;
130 struct mlxsw_sp_rif_ipip_lb_config lb_config;
133 struct mlxsw_sp_rif_ops {
134 enum mlxsw_sp_rif_type type;
137 void (*setup)(struct mlxsw_sp_rif *rif,
138 const struct mlxsw_sp_rif_params *params);
139 int (*configure)(struct mlxsw_sp_rif *rif,
140 struct netlink_ext_ack *extack);
141 void (*deconfigure)(struct mlxsw_sp_rif *rif);
142 struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
143 const struct mlxsw_sp_rif_params *params,
144 struct netlink_ext_ack *extack);
145 void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
148 struct mlxsw_sp_rif_mac_profile {
149 unsigned char mac_prefix[ETH_ALEN];
150 refcount_t ref_count;
154 struct mlxsw_sp_router_ops {
155 int (*init)(struct mlxsw_sp *mlxsw_sp);
156 int (*ipips_init)(struct mlxsw_sp *mlxsw_sp);
159 static struct mlxsw_sp_rif *
160 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
161 const struct net_device *dev);
162 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
163 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
164 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
165 struct mlxsw_sp_lpm_tree *lpm_tree);
166 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
167 const struct mlxsw_sp_fib *fib,
169 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
170 const struct mlxsw_sp_fib *fib);
172 static unsigned int *
173 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
174 enum mlxsw_sp_rif_counter_dir dir)
177 case MLXSW_SP_RIF_COUNTER_EGRESS:
178 return &rif->counter_egress;
179 case MLXSW_SP_RIF_COUNTER_INGRESS:
180 return &rif->counter_ingress;
186 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
187 enum mlxsw_sp_rif_counter_dir dir)
190 case MLXSW_SP_RIF_COUNTER_EGRESS:
191 return rif->counter_egress_valid;
192 case MLXSW_SP_RIF_COUNTER_INGRESS:
193 return rif->counter_ingress_valid;
199 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
200 enum mlxsw_sp_rif_counter_dir dir,
204 case MLXSW_SP_RIF_COUNTER_EGRESS:
205 rif->counter_egress_valid = valid;
207 case MLXSW_SP_RIF_COUNTER_INGRESS:
208 rif->counter_ingress_valid = valid;
213 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
214 unsigned int counter_index, bool enable,
215 enum mlxsw_sp_rif_counter_dir dir)
217 char ritr_pl[MLXSW_REG_RITR_LEN];
218 bool is_egress = false;
221 if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
223 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
224 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
228 mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
230 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
233 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
234 struct mlxsw_sp_rif *rif,
235 enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
237 char ricnt_pl[MLXSW_REG_RICNT_LEN];
238 unsigned int *p_counter_index;
242 valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
246 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
247 if (!p_counter_index)
249 mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
250 MLXSW_REG_RICNT_OPCODE_NOP);
251 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
254 *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
258 struct mlxsw_sp_rif_counter_set_basic {
259 u64 good_unicast_packets;
260 u64 good_multicast_packets;
261 u64 good_broadcast_packets;
262 u64 good_unicast_bytes;
263 u64 good_multicast_bytes;
264 u64 good_broadcast_bytes;
272 mlxsw_sp_rif_counter_fetch_clear(struct mlxsw_sp_rif *rif,
273 enum mlxsw_sp_rif_counter_dir dir,
274 struct mlxsw_sp_rif_counter_set_basic *set)
276 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
277 char ricnt_pl[MLXSW_REG_RICNT_LEN];
278 unsigned int *p_counter_index;
281 if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
284 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
285 if (!p_counter_index)
288 mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
289 MLXSW_REG_RICNT_OPCODE_CLEAR);
290 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
297 #define MLXSW_SP_RIF_COUNTER_EXTRACT(NAME) \
298 (set->NAME = mlxsw_reg_ricnt_ ## NAME ## _get(ricnt_pl))
300 MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_packets);
301 MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_packets);
302 MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_packets);
303 MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_bytes);
304 MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_bytes);
305 MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_bytes);
306 MLXSW_SP_RIF_COUNTER_EXTRACT(error_packets);
307 MLXSW_SP_RIF_COUNTER_EXTRACT(discard_packets);
308 MLXSW_SP_RIF_COUNTER_EXTRACT(error_bytes);
309 MLXSW_SP_RIF_COUNTER_EXTRACT(discard_bytes);
311 #undef MLXSW_SP_RIF_COUNTER_EXTRACT
316 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
317 unsigned int counter_index)
319 char ricnt_pl[MLXSW_REG_RICNT_LEN];
321 mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
322 MLXSW_REG_RICNT_OPCODE_CLEAR);
323 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
326 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif,
327 enum mlxsw_sp_rif_counter_dir dir)
329 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
330 unsigned int *p_counter_index;
333 if (mlxsw_sp_rif_counter_valid_get(rif, dir))
336 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
337 if (!p_counter_index)
340 err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
345 err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
347 goto err_counter_clear;
349 err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
350 *p_counter_index, true, dir);
352 goto err_counter_edit;
353 mlxsw_sp_rif_counter_valid_set(rif, dir, true);
358 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
363 void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif,
364 enum mlxsw_sp_rif_counter_dir dir)
366 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
367 unsigned int *p_counter_index;
369 if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
372 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
373 if (WARN_ON(!p_counter_index))
375 mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
376 *p_counter_index, false, dir);
377 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
379 mlxsw_sp_rif_counter_valid_set(rif, dir, false);
382 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
384 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
385 struct devlink *devlink;
387 devlink = priv_to_devlink(mlxsw_sp->core);
388 if (!devlink_dpipe_table_counter_enabled(devlink,
389 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
391 mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
394 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
396 mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
399 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
401 struct mlxsw_sp_prefix_usage {
402 DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
405 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
406 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
409 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
410 struct mlxsw_sp_prefix_usage *prefix_usage2)
412 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
416 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
417 struct mlxsw_sp_prefix_usage *prefix_usage2)
419 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
423 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
424 unsigned char prefix_len)
426 set_bit(prefix_len, prefix_usage->b);
430 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
431 unsigned char prefix_len)
433 clear_bit(prefix_len, prefix_usage->b);
436 struct mlxsw_sp_fib_key {
437 unsigned char addr[sizeof(struct in6_addr)];
438 unsigned char prefix_len;
441 enum mlxsw_sp_fib_entry_type {
442 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
443 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
444 MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
445 MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
446 MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
448 /* This is a special case of local delivery, where a packet should be
449 * decapsulated on reception. Note that there is no corresponding ENCAP,
450 * because that's a type of next hop, not of FIB entry. (There can be
451 * several next hops in a REMOTE entry, and some of them may be
452 * encapsulating entries.)
454 MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
455 MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
458 struct mlxsw_sp_nexthop_group_info;
459 struct mlxsw_sp_nexthop_group;
460 struct mlxsw_sp_fib_entry;
462 struct mlxsw_sp_fib_node {
463 struct mlxsw_sp_fib_entry *fib_entry;
464 struct list_head list;
465 struct rhash_head ht_node;
466 struct mlxsw_sp_fib *fib;
467 struct mlxsw_sp_fib_key key;
470 struct mlxsw_sp_fib_entry_decap {
471 struct mlxsw_sp_ipip_entry *ipip_entry;
475 struct mlxsw_sp_fib_entry {
476 struct mlxsw_sp_fib_node *fib_node;
477 enum mlxsw_sp_fib_entry_type type;
478 struct list_head nexthop_group_node;
479 struct mlxsw_sp_nexthop_group *nh_group;
480 struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
483 struct mlxsw_sp_fib4_entry {
484 struct mlxsw_sp_fib_entry common;
491 struct mlxsw_sp_fib6_entry {
492 struct mlxsw_sp_fib_entry common;
493 struct list_head rt6_list;
497 struct mlxsw_sp_rt6 {
498 struct list_head list;
499 struct fib6_info *rt;
502 struct mlxsw_sp_lpm_tree {
504 unsigned int ref_count;
505 enum mlxsw_sp_l3proto proto;
506 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
507 struct mlxsw_sp_prefix_usage prefix_usage;
510 struct mlxsw_sp_fib {
511 struct rhashtable ht;
512 struct list_head node_list;
513 struct mlxsw_sp_vr *vr;
514 struct mlxsw_sp_lpm_tree *lpm_tree;
515 enum mlxsw_sp_l3proto proto;
519 u16 id; /* virtual router ID */
520 u32 tb_id; /* kernel fib table id */
521 unsigned int rif_count;
522 struct mlxsw_sp_fib *fib4;
523 struct mlxsw_sp_fib *fib6;
524 struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
525 struct mlxsw_sp_rif *ul_rif;
526 refcount_t ul_rif_refcnt;
529 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
531 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
532 struct mlxsw_sp_vr *vr,
533 enum mlxsw_sp_l3proto proto)
535 struct mlxsw_sp_lpm_tree *lpm_tree;
536 struct mlxsw_sp_fib *fib;
539 lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
540 fib = kzalloc(sizeof(*fib), GFP_KERNEL);
542 return ERR_PTR(-ENOMEM);
543 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
545 goto err_rhashtable_init;
546 INIT_LIST_HEAD(&fib->node_list);
549 fib->lpm_tree = lpm_tree;
550 mlxsw_sp_lpm_tree_hold(lpm_tree);
551 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
553 goto err_lpm_tree_bind;
557 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
563 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
564 struct mlxsw_sp_fib *fib)
566 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
567 mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
568 WARN_ON(!list_empty(&fib->node_list));
569 rhashtable_destroy(&fib->ht);
573 static struct mlxsw_sp_lpm_tree *
574 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
576 static struct mlxsw_sp_lpm_tree *lpm_tree;
579 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
580 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
581 if (lpm_tree->ref_count == 0)
587 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
588 struct mlxsw_sp_lpm_tree *lpm_tree)
590 char ralta_pl[MLXSW_REG_RALTA_LEN];
592 mlxsw_reg_ralta_pack(ralta_pl, true,
593 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
595 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
598 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
599 struct mlxsw_sp_lpm_tree *lpm_tree)
601 char ralta_pl[MLXSW_REG_RALTA_LEN];
603 mlxsw_reg_ralta_pack(ralta_pl, false,
604 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
606 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
610 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
611 struct mlxsw_sp_prefix_usage *prefix_usage,
612 struct mlxsw_sp_lpm_tree *lpm_tree)
614 char ralst_pl[MLXSW_REG_RALST_LEN];
617 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
619 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
622 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
623 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
626 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
627 MLXSW_REG_RALST_BIN_NO_CHILD);
628 last_prefix = prefix;
630 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
633 static struct mlxsw_sp_lpm_tree *
634 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
635 struct mlxsw_sp_prefix_usage *prefix_usage,
636 enum mlxsw_sp_l3proto proto)
638 struct mlxsw_sp_lpm_tree *lpm_tree;
641 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
643 return ERR_PTR(-EBUSY);
644 lpm_tree->proto = proto;
645 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
649 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
652 goto err_left_struct_set;
653 memcpy(&lpm_tree->prefix_usage, prefix_usage,
654 sizeof(lpm_tree->prefix_usage));
655 memset(&lpm_tree->prefix_ref_count, 0,
656 sizeof(lpm_tree->prefix_ref_count));
657 lpm_tree->ref_count = 1;
661 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
665 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
666 struct mlxsw_sp_lpm_tree *lpm_tree)
668 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
671 static struct mlxsw_sp_lpm_tree *
672 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
673 struct mlxsw_sp_prefix_usage *prefix_usage,
674 enum mlxsw_sp_l3proto proto)
676 struct mlxsw_sp_lpm_tree *lpm_tree;
679 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
680 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
681 if (lpm_tree->ref_count != 0 &&
682 lpm_tree->proto == proto &&
683 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
685 mlxsw_sp_lpm_tree_hold(lpm_tree);
689 return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
692 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
694 lpm_tree->ref_count++;
697 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
698 struct mlxsw_sp_lpm_tree *lpm_tree)
700 if (--lpm_tree->ref_count == 0)
701 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
704 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
706 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
708 struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
709 struct mlxsw_sp_lpm_tree *lpm_tree;
713 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
716 max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
717 mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
718 mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
719 sizeof(struct mlxsw_sp_lpm_tree),
721 if (!mlxsw_sp->router->lpm.trees)
724 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
725 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
726 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
729 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
730 MLXSW_SP_L3_PROTO_IPV4);
731 if (IS_ERR(lpm_tree)) {
732 err = PTR_ERR(lpm_tree);
733 goto err_ipv4_tree_get;
735 mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
737 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
738 MLXSW_SP_L3_PROTO_IPV6);
739 if (IS_ERR(lpm_tree)) {
740 err = PTR_ERR(lpm_tree);
741 goto err_ipv6_tree_get;
743 mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
748 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
749 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
751 kfree(mlxsw_sp->router->lpm.trees);
755 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
757 struct mlxsw_sp_lpm_tree *lpm_tree;
759 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
760 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
762 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
763 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
765 kfree(mlxsw_sp->router->lpm.trees);
768 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
770 return !!vr->fib4 || !!vr->fib6 ||
771 !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
772 !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
775 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
777 int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
778 struct mlxsw_sp_vr *vr;
781 for (i = 0; i < max_vrs; i++) {
782 vr = &mlxsw_sp->router->vrs[i];
783 if (!mlxsw_sp_vr_is_used(vr))
789 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
790 const struct mlxsw_sp_fib *fib, u8 tree_id)
792 char raltb_pl[MLXSW_REG_RALTB_LEN];
794 mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
795 (enum mlxsw_reg_ralxx_protocol) fib->proto,
797 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
800 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
801 const struct mlxsw_sp_fib *fib)
803 char raltb_pl[MLXSW_REG_RALTB_LEN];
805 /* Bind to tree 0 which is default */
806 mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
807 (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
808 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
811 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
813 /* For our purpose, squash main, default and local tables into one */
814 if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
815 tb_id = RT_TABLE_MAIN;
819 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
822 int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
823 struct mlxsw_sp_vr *vr;
826 tb_id = mlxsw_sp_fix_tb_id(tb_id);
828 for (i = 0; i < max_vrs; i++) {
829 vr = &mlxsw_sp->router->vrs[i];
830 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
836 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
839 struct mlxsw_sp_vr *vr;
842 mutex_lock(&mlxsw_sp->router->lock);
843 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
850 mutex_unlock(&mlxsw_sp->router->lock);
854 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
855 enum mlxsw_sp_l3proto proto)
858 case MLXSW_SP_L3_PROTO_IPV4:
860 case MLXSW_SP_L3_PROTO_IPV6:
866 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
868 struct netlink_ext_ack *extack)
870 struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
871 struct mlxsw_sp_fib *fib4;
872 struct mlxsw_sp_fib *fib6;
873 struct mlxsw_sp_vr *vr;
876 vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
878 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
879 return ERR_PTR(-EBUSY);
881 fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
883 return ERR_CAST(fib4);
884 fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
887 goto err_fib6_create;
889 mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
890 MLXSW_SP_L3_PROTO_IPV4);
891 if (IS_ERR(mr4_table)) {
892 err = PTR_ERR(mr4_table);
893 goto err_mr4_table_create;
895 mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
896 MLXSW_SP_L3_PROTO_IPV6);
897 if (IS_ERR(mr6_table)) {
898 err = PTR_ERR(mr6_table);
899 goto err_mr6_table_create;
904 vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
905 vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
909 err_mr6_table_create:
910 mlxsw_sp_mr_table_destroy(mr4_table);
911 err_mr4_table_create:
912 mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
914 mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
918 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
919 struct mlxsw_sp_vr *vr)
921 mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
922 vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
923 mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
924 vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
925 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
927 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
931 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
932 struct netlink_ext_ack *extack)
934 struct mlxsw_sp_vr *vr;
936 tb_id = mlxsw_sp_fix_tb_id(tb_id);
937 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
939 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
943 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
945 if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
946 list_empty(&vr->fib6->node_list) &&
947 mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
948 mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
949 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
953 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
954 enum mlxsw_sp_l3proto proto, u8 tree_id)
956 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
958 if (!mlxsw_sp_vr_is_used(vr))
960 if (fib->lpm_tree->id == tree_id)
965 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
966 struct mlxsw_sp_fib *fib,
967 struct mlxsw_sp_lpm_tree *new_tree)
969 struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
972 fib->lpm_tree = new_tree;
973 mlxsw_sp_lpm_tree_hold(new_tree);
974 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
977 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
981 mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
982 fib->lpm_tree = old_tree;
986 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
987 struct mlxsw_sp_fib *fib,
988 struct mlxsw_sp_lpm_tree *new_tree)
990 int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
991 enum mlxsw_sp_l3proto proto = fib->proto;
992 struct mlxsw_sp_lpm_tree *old_tree;
993 u8 old_id, new_id = new_tree->id;
994 struct mlxsw_sp_vr *vr;
997 old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
998 old_id = old_tree->id;
1000 for (i = 0; i < max_vrs; i++) {
1001 vr = &mlxsw_sp->router->vrs[i];
1002 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
1004 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
1005 mlxsw_sp_vr_fib(vr, proto),
1008 goto err_tree_replace;
1011 memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
1012 sizeof(new_tree->prefix_ref_count));
1013 mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
1014 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
1019 for (i--; i >= 0; i--) {
1020 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
1022 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
1023 mlxsw_sp_vr_fib(vr, proto),
1029 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
1031 struct mlxsw_sp_vr *vr;
1035 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
1038 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
1039 mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
1041 if (!mlxsw_sp->router->vrs)
1044 for (i = 0; i < max_vrs; i++) {
1045 vr = &mlxsw_sp->router->vrs[i];
1052 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
1054 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
1056 /* At this stage we're guaranteed not to have new incoming
1057 * FIB notifications and the work queue is free from FIBs
1058 * sitting on top of mlxsw netdevs. However, we can still
1059 * have other FIBs queued. Flush the queue before flushing
1060 * the device's tables. No need for locks, as we're the only
1063 mlxsw_core_flush_owq();
1064 mlxsw_sp_router_fib_flush(mlxsw_sp);
1065 kfree(mlxsw_sp->router->vrs);
1068 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
1070 struct net_device *d;
1074 d = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1076 tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1078 tb_id = RT_TABLE_MAIN;
1085 mlxsw_sp_crif_init(struct mlxsw_sp_crif *crif, struct net_device *dev)
1087 crif->key.dev = dev;
1088 INIT_LIST_HEAD(&crif->nexthop_list);
1091 static struct mlxsw_sp_crif *
1092 mlxsw_sp_crif_alloc(struct net_device *dev)
1094 struct mlxsw_sp_crif *crif;
1096 crif = kzalloc(sizeof(*crif), GFP_KERNEL);
1100 mlxsw_sp_crif_init(crif, dev);
1104 static void mlxsw_sp_crif_free(struct mlxsw_sp_crif *crif)
1106 if (WARN_ON(crif->rif))
1109 WARN_ON(!list_empty(&crif->nexthop_list));
1113 static int mlxsw_sp_crif_insert(struct mlxsw_sp_router *router,
1114 struct mlxsw_sp_crif *crif)
1116 return rhashtable_insert_fast(&router->crif_ht, &crif->ht_node,
1117 mlxsw_sp_crif_ht_params);
1120 static void mlxsw_sp_crif_remove(struct mlxsw_sp_router *router,
1121 struct mlxsw_sp_crif *crif)
1123 rhashtable_remove_fast(&router->crif_ht, &crif->ht_node,
1124 mlxsw_sp_crif_ht_params);
1127 static struct mlxsw_sp_crif *
1128 mlxsw_sp_crif_lookup(struct mlxsw_sp_router *router,
1129 const struct net_device *dev)
1131 struct mlxsw_sp_crif_key key = {
1132 .dev = (struct net_device *)dev,
1135 return rhashtable_lookup_fast(&router->crif_ht, &key,
1136 mlxsw_sp_crif_ht_params);
1139 static struct mlxsw_sp_rif *
1140 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1141 const struct mlxsw_sp_rif_params *params,
1142 struct netlink_ext_ack *extack);
1144 static struct mlxsw_sp_rif_ipip_lb *
1145 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1146 enum mlxsw_sp_ipip_type ipipt,
1147 struct net_device *ol_dev,
1148 struct netlink_ext_ack *extack)
1150 struct mlxsw_sp_rif_params_ipip_lb lb_params;
1151 const struct mlxsw_sp_ipip_ops *ipip_ops;
1152 struct mlxsw_sp_rif *rif;
1154 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1155 lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1156 .common.dev = ol_dev,
1157 .common.lag = false,
1158 .common.double_entry = ipip_ops->double_rif_entry,
1159 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1162 rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1164 return ERR_CAST(rif);
1165 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1168 static struct mlxsw_sp_ipip_entry *
1169 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1170 enum mlxsw_sp_ipip_type ipipt,
1171 struct net_device *ol_dev)
1173 const struct mlxsw_sp_ipip_ops *ipip_ops;
1174 struct mlxsw_sp_ipip_entry *ipip_entry;
1175 struct mlxsw_sp_ipip_entry *ret = NULL;
1178 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1179 ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1181 return ERR_PTR(-ENOMEM);
1183 ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1185 if (IS_ERR(ipip_entry->ol_lb)) {
1186 ret = ERR_CAST(ipip_entry->ol_lb);
1187 goto err_ol_ipip_lb_create;
1190 ipip_entry->ipipt = ipipt;
1191 ipip_entry->ol_dev = ol_dev;
1192 ipip_entry->parms = ipip_ops->parms_init(ol_dev);
1194 err = ipip_ops->rem_ip_addr_set(mlxsw_sp, ipip_entry);
1197 goto err_rem_ip_addr_set;
1202 err_rem_ip_addr_set:
1203 mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1204 err_ol_ipip_lb_create:
1209 static void mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp *mlxsw_sp,
1210 struct mlxsw_sp_ipip_entry *ipip_entry)
1212 const struct mlxsw_sp_ipip_ops *ipip_ops =
1213 mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1215 ipip_ops->rem_ip_addr_unset(mlxsw_sp, ipip_entry);
1216 mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1221 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1222 const enum mlxsw_sp_l3proto ul_proto,
1223 union mlxsw_sp_l3addr saddr,
1225 struct mlxsw_sp_ipip_entry *ipip_entry)
1227 u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1228 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1229 union mlxsw_sp_l3addr tun_saddr;
1231 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1234 tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1235 return tun_ul_tb_id == ul_tb_id &&
1236 mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1239 static int mlxsw_sp_ipip_decap_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp,
1240 enum mlxsw_sp_ipip_type ipipt)
1242 const struct mlxsw_sp_ipip_ops *ipip_ops;
1244 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1246 /* Not all tunnels require to increase the default pasing depth
1249 if (ipip_ops->inc_parsing_depth)
1250 return mlxsw_sp_parsing_depth_inc(mlxsw_sp);
1255 static void mlxsw_sp_ipip_decap_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp,
1256 enum mlxsw_sp_ipip_type ipipt)
1258 const struct mlxsw_sp_ipip_ops *ipip_ops =
1259 mlxsw_sp->router->ipip_ops_arr[ipipt];
1261 if (ipip_ops->inc_parsing_depth)
1262 mlxsw_sp_parsing_depth_dec(mlxsw_sp);
1266 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1267 struct mlxsw_sp_fib_entry *fib_entry,
1268 struct mlxsw_sp_ipip_entry *ipip_entry)
1273 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1278 err = mlxsw_sp_ipip_decap_parsing_depth_inc(mlxsw_sp,
1281 goto err_parsing_depth_inc;
1283 ipip_entry->decap_fib_entry = fib_entry;
1284 fib_entry->decap.ipip_entry = ipip_entry;
1285 fib_entry->decap.tunnel_index = tunnel_index;
1289 err_parsing_depth_inc:
1290 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
1291 fib_entry->decap.tunnel_index);
1295 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1296 struct mlxsw_sp_fib_entry *fib_entry)
1298 enum mlxsw_sp_ipip_type ipipt = fib_entry->decap.ipip_entry->ipipt;
1300 /* Unlink this node from the IPIP entry that it's the decap entry of. */
1301 fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1302 fib_entry->decap.ipip_entry = NULL;
1303 mlxsw_sp_ipip_decap_parsing_depth_dec(mlxsw_sp, ipipt);
1304 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1305 1, fib_entry->decap.tunnel_index);
1308 static struct mlxsw_sp_fib_node *
1309 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1310 size_t addr_len, unsigned char prefix_len);
1311 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1312 struct mlxsw_sp_fib_entry *fib_entry);
1315 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1316 struct mlxsw_sp_ipip_entry *ipip_entry)
1318 struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1320 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1321 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1323 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1327 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1328 struct mlxsw_sp_ipip_entry *ipip_entry,
1329 struct mlxsw_sp_fib_entry *decap_fib_entry)
1331 if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1334 decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1336 if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1337 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1340 static struct mlxsw_sp_fib_entry *
1341 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1342 enum mlxsw_sp_l3proto proto,
1343 const union mlxsw_sp_l3addr *addr,
1344 enum mlxsw_sp_fib_entry_type type)
1346 struct mlxsw_sp_fib_node *fib_node;
1347 unsigned char addr_prefix_len;
1348 struct mlxsw_sp_fib *fib;
1349 struct mlxsw_sp_vr *vr;
1354 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1357 fib = mlxsw_sp_vr_fib(vr, proto);
1360 case MLXSW_SP_L3_PROTO_IPV4:
1361 addr4 = be32_to_cpu(addr->addr4);
1364 addr_prefix_len = 32;
1366 case MLXSW_SP_L3_PROTO_IPV6:
1367 addrp = &addr->addr6;
1369 addr_prefix_len = 128;
1376 fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1378 if (!fib_node || fib_node->fib_entry->type != type)
1381 return fib_node->fib_entry;
1384 /* Given an IPIP entry, find the corresponding decap route. */
1385 static struct mlxsw_sp_fib_entry *
1386 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1387 struct mlxsw_sp_ipip_entry *ipip_entry)
1389 static struct mlxsw_sp_fib_node *fib_node;
1390 const struct mlxsw_sp_ipip_ops *ipip_ops;
1391 unsigned char saddr_prefix_len;
1392 union mlxsw_sp_l3addr saddr;
1393 struct mlxsw_sp_fib *ul_fib;
1394 struct mlxsw_sp_vr *ul_vr;
1400 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1402 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1403 ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1407 ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1408 saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1409 ipip_entry->ol_dev);
1411 switch (ipip_ops->ul_proto) {
1412 case MLXSW_SP_L3_PROTO_IPV4:
1413 saddr4 = be32_to_cpu(saddr.addr4);
1416 saddr_prefix_len = 32;
1418 case MLXSW_SP_L3_PROTO_IPV6:
1419 saddrp = &saddr.addr6;
1421 saddr_prefix_len = 128;
1428 fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1431 fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1434 return fib_node->fib_entry;
1437 static struct mlxsw_sp_ipip_entry *
1438 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1439 enum mlxsw_sp_ipip_type ipipt,
1440 struct net_device *ol_dev)
1442 struct mlxsw_sp_ipip_entry *ipip_entry;
1444 ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1445 if (IS_ERR(ipip_entry))
1448 list_add_tail(&ipip_entry->ipip_list_node,
1449 &mlxsw_sp->router->ipip_list);
1455 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1456 struct mlxsw_sp_ipip_entry *ipip_entry)
1458 list_del(&ipip_entry->ipip_list_node);
1459 mlxsw_sp_ipip_entry_dealloc(mlxsw_sp, ipip_entry);
1463 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1464 const struct net_device *ul_dev,
1465 enum mlxsw_sp_l3proto ul_proto,
1466 union mlxsw_sp_l3addr ul_dip,
1467 struct mlxsw_sp_ipip_entry *ipip_entry)
1469 u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1470 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1472 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1475 return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1476 ul_tb_id, ipip_entry);
1479 /* Given decap parameters, find the corresponding IPIP entry. */
1480 static struct mlxsw_sp_ipip_entry *
1481 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex,
1482 enum mlxsw_sp_l3proto ul_proto,
1483 union mlxsw_sp_l3addr ul_dip)
1485 struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1486 struct net_device *ul_dev;
1490 ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex);
1494 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1496 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1510 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1511 const struct net_device *dev,
1512 enum mlxsw_sp_ipip_type *p_type)
1514 struct mlxsw_sp_router *router = mlxsw_sp->router;
1515 const struct mlxsw_sp_ipip_ops *ipip_ops;
1516 enum mlxsw_sp_ipip_type ipipt;
1518 for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1519 ipip_ops = router->ipip_ops_arr[ipipt];
1520 if (dev->type == ipip_ops->dev_type) {
1529 static bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1530 const struct net_device *dev)
1532 return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1535 static struct mlxsw_sp_ipip_entry *
1536 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1537 const struct net_device *ol_dev)
1539 struct mlxsw_sp_ipip_entry *ipip_entry;
1541 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1543 if (ipip_entry->ol_dev == ol_dev)
1549 static struct mlxsw_sp_ipip_entry *
1550 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1551 const struct net_device *ul_dev,
1552 struct mlxsw_sp_ipip_entry *start)
1554 struct mlxsw_sp_ipip_entry *ipip_entry;
1556 ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1558 list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1560 struct net_device *ol_dev = ipip_entry->ol_dev;
1561 struct net_device *ipip_ul_dev;
1564 ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1567 if (ipip_ul_dev == ul_dev)
1574 static bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
1575 const struct net_device *dev)
1577 return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1580 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1581 const struct net_device *ol_dev,
1582 enum mlxsw_sp_ipip_type ipipt)
1584 const struct mlxsw_sp_ipip_ops *ops
1585 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1587 return ops->can_offload(mlxsw_sp, ol_dev);
1590 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1591 struct net_device *ol_dev)
1593 enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX;
1594 struct mlxsw_sp_ipip_entry *ipip_entry;
1595 enum mlxsw_sp_l3proto ul_proto;
1596 union mlxsw_sp_l3addr saddr;
1599 mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1600 if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1601 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1602 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1603 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1604 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1607 ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1609 if (IS_ERR(ipip_entry))
1610 return PTR_ERR(ipip_entry);
1617 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1618 struct net_device *ol_dev)
1620 struct mlxsw_sp_ipip_entry *ipip_entry;
1622 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1624 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1628 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1629 struct mlxsw_sp_ipip_entry *ipip_entry)
1631 struct mlxsw_sp_fib_entry *decap_fib_entry;
1633 decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1634 if (decap_fib_entry)
1635 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1640 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1641 u16 ul_rif_id, bool enable)
1643 struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1644 struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
1645 enum mlxsw_reg_ritr_loopback_ipip_options ipip_options;
1646 struct mlxsw_sp_rif *rif = &lb_rif->common;
1647 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1648 char ritr_pl[MLXSW_REG_RITR_LEN];
1649 struct in6_addr *saddr6;
1652 ipip_options = MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET;
1653 switch (lb_cf.ul_protocol) {
1654 case MLXSW_SP_L3_PROTO_IPV4:
1655 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1656 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1657 rif->rif_index, rif->vr_id, dev->mtu);
1658 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1659 ipip_options, ul_vr_id,
1664 case MLXSW_SP_L3_PROTO_IPV6:
1665 saddr6 = &lb_cf.saddr.addr6;
1666 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1667 rif->rif_index, rif->vr_id, dev->mtu);
1668 mlxsw_reg_ritr_loopback_ipip6_pack(ritr_pl, lb_cf.lb_ipipt,
1669 ipip_options, ul_vr_id,
1675 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1678 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1679 struct net_device *ol_dev)
1681 struct mlxsw_sp_ipip_entry *ipip_entry;
1682 struct mlxsw_sp_rif_ipip_lb *lb_rif;
1685 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1687 lb_rif = ipip_entry->ol_lb;
1688 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1689 lb_rif->ul_rif_id, true);
1692 lb_rif->common.mtu = ol_dev->mtu;
1699 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1700 struct net_device *ol_dev)
1702 struct mlxsw_sp_ipip_entry *ipip_entry;
1704 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1706 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1710 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1711 struct mlxsw_sp_ipip_entry *ipip_entry)
1713 if (ipip_entry->decap_fib_entry)
1714 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1717 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1718 struct net_device *ol_dev)
1720 struct mlxsw_sp_ipip_entry *ipip_entry;
1722 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1724 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1727 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1728 struct mlxsw_sp_rif *rif);
1730 static void mlxsw_sp_rif_migrate_destroy(struct mlxsw_sp *mlxsw_sp,
1731 struct mlxsw_sp_rif *old_rif,
1732 struct mlxsw_sp_rif *new_rif,
1735 struct mlxsw_sp_crif *crif = old_rif->crif;
1736 struct mlxsw_sp_crif mock_crif = {};
1739 mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
1741 /* Plant a mock CRIF so that destroying the old RIF doesn't unoffload
1742 * our nexthops and IPIP tunnels, and doesn't sever the crif->rif link.
1744 mlxsw_sp_crif_init(&mock_crif, crif->key.dev);
1745 old_rif->crif = &mock_crif;
1746 mock_crif.rif = old_rif;
1747 mlxsw_sp_rif_destroy(old_rif);
1751 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1752 struct mlxsw_sp_ipip_entry *ipip_entry,
1754 struct netlink_ext_ack *extack)
1756 struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1757 struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1759 new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1763 if (IS_ERR(new_lb_rif))
1764 return PTR_ERR(new_lb_rif);
1765 ipip_entry->ol_lb = new_lb_rif;
1767 mlxsw_sp_rif_migrate_destroy(mlxsw_sp, &old_lb_rif->common,
1768 &new_lb_rif->common, keep_encap);
1773 * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry.
1774 * @mlxsw_sp: mlxsw_sp.
1775 * @ipip_entry: IPIP entry.
1776 * @recreate_loopback: Recreates the associated loopback RIF.
1777 * @keep_encap: Updates next hops that use the tunnel netdevice. This is only
1778 * relevant when recreate_loopback is true.
1779 * @update_nexthops: Updates next hops, keeping the current loopback RIF. This
1780 * is only relevant when recreate_loopback is false.
1783 * Return: Non-zero value on failure.
1785 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1786 struct mlxsw_sp_ipip_entry *ipip_entry,
1787 bool recreate_loopback,
1789 bool update_nexthops,
1790 struct netlink_ext_ack *extack)
1794 /* RIFs can't be edited, so to update loopback, we need to destroy and
1795 * recreate it. That creates a window of opportunity where RALUE and
1796 * RATR registers end up referencing a RIF that's already gone. RATRs
1797 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1798 * of RALUE, demote the decap route back.
1800 if (ipip_entry->decap_fib_entry)
1801 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1803 if (recreate_loopback) {
1804 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1805 keep_encap, extack);
1808 } else if (update_nexthops) {
1809 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1810 &ipip_entry->ol_lb->common);
1813 if (ipip_entry->ol_dev->flags & IFF_UP)
1814 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1819 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1820 struct net_device *ol_dev,
1821 struct netlink_ext_ack *extack)
1823 struct mlxsw_sp_ipip_entry *ipip_entry =
1824 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1829 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1830 true, false, false, extack);
1834 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1835 struct mlxsw_sp_ipip_entry *ipip_entry,
1836 struct net_device *ul_dev,
1838 struct netlink_ext_ack *extack)
1840 u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1841 enum mlxsw_sp_l3proto ul_proto;
1842 union mlxsw_sp_l3addr saddr;
1844 /* Moving underlay to a different VRF might cause local address
1845 * conflict, and the conflicting tunnels need to be demoted.
1847 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1848 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1849 if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1852 *demote_this = true;
1856 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1857 true, true, false, extack);
1861 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1862 struct mlxsw_sp_ipip_entry *ipip_entry,
1863 struct net_device *ul_dev)
1865 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1866 false, false, true, NULL);
1870 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1871 struct mlxsw_sp_ipip_entry *ipip_entry,
1872 struct net_device *ul_dev)
1874 /* A down underlay device causes encapsulated packets to not be
1875 * forwarded, but decap still works. So refresh next hops without
1876 * touching anything else.
1878 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1879 false, false, true, NULL);
1883 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1884 struct net_device *ol_dev,
1885 struct netlink_ext_ack *extack)
1887 const struct mlxsw_sp_ipip_ops *ipip_ops;
1888 struct mlxsw_sp_ipip_entry *ipip_entry;
1891 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1893 /* A change might make a tunnel eligible for offloading, but
1894 * that is currently not implemented. What falls to slow path
1899 /* A change might make a tunnel not eligible for offloading. */
1900 if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1901 ipip_entry->ipipt)) {
1902 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1906 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1907 err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1911 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1912 struct mlxsw_sp_ipip_entry *ipip_entry)
1914 struct net_device *ol_dev = ipip_entry->ol_dev;
1916 if (ol_dev->flags & IFF_UP)
1917 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1918 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1921 /* The configuration where several tunnels have the same local address in the
1922 * same underlay table needs special treatment in the HW. That is currently not
1923 * implemented in the driver. This function finds and demotes the first tunnel
1924 * with a given source address, except the one passed in the argument
1928 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1929 enum mlxsw_sp_l3proto ul_proto,
1930 union mlxsw_sp_l3addr saddr,
1932 const struct mlxsw_sp_ipip_entry *except)
1934 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1936 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1938 if (ipip_entry != except &&
1939 mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1940 ul_tb_id, ipip_entry)) {
1941 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1949 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1950 struct net_device *ul_dev)
1952 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1954 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1956 struct net_device *ol_dev = ipip_entry->ol_dev;
1957 struct net_device *ipip_ul_dev;
1960 ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1962 if (ipip_ul_dev == ul_dev)
1963 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1967 static int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1968 struct net_device *ol_dev,
1969 unsigned long event,
1970 struct netdev_notifier_info *info)
1972 struct netdev_notifier_changeupper_info *chup;
1973 struct netlink_ext_ack *extack;
1977 case NETDEV_REGISTER:
1978 err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1980 case NETDEV_UNREGISTER:
1981 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1984 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1987 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1989 case NETDEV_CHANGEUPPER:
1990 chup = container_of(info, typeof(*chup), info);
1991 extack = info->extack;
1992 if (netif_is_l3_master(chup->upper_dev))
1993 err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1998 extack = info->extack;
1999 err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
2002 case NETDEV_CHANGEMTU:
2003 err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
2010 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
2011 struct mlxsw_sp_ipip_entry *ipip_entry,
2012 struct net_device *ul_dev,
2014 unsigned long event,
2015 struct netdev_notifier_info *info)
2017 struct netdev_notifier_changeupper_info *chup;
2018 struct netlink_ext_ack *extack;
2021 case NETDEV_CHANGEUPPER:
2022 chup = container_of(info, typeof(*chup), info);
2023 extack = info->extack;
2024 if (netif_is_l3_master(chup->upper_dev))
2025 return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
2033 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
2036 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
2044 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
2045 struct net_device *ul_dev,
2046 unsigned long event,
2047 struct netdev_notifier_info *info)
2049 struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
2052 while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
2055 struct mlxsw_sp_ipip_entry *prev;
2056 bool demote_this = false;
2058 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
2059 ul_dev, &demote_this,
2062 mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
2068 if (list_is_first(&ipip_entry->ipip_list_node,
2069 &mlxsw_sp->router->ipip_list))
2072 /* This can't be cached from previous iteration,
2073 * because that entry could be gone now.
2075 prev = list_prev_entry(ipip_entry,
2077 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
2085 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2086 enum mlxsw_sp_l3proto ul_proto,
2087 const union mlxsw_sp_l3addr *ul_sip,
2090 enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2091 struct mlxsw_sp_router *router = mlxsw_sp->router;
2092 struct mlxsw_sp_fib_entry *fib_entry;
2095 mutex_lock(&mlxsw_sp->router->lock);
2097 if (WARN_ON_ONCE(router->nve_decap_config.valid)) {
2102 router->nve_decap_config.ul_tb_id = ul_tb_id;
2103 router->nve_decap_config.tunnel_index = tunnel_index;
2104 router->nve_decap_config.ul_proto = ul_proto;
2105 router->nve_decap_config.ul_sip = *ul_sip;
2106 router->nve_decap_config.valid = true;
2108 /* It is valid to create a tunnel with a local IP and only later
2109 * assign this IP address to a local interface
2111 fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2117 fib_entry->decap.tunnel_index = tunnel_index;
2118 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2120 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2122 goto err_fib_entry_update;
2126 err_fib_entry_update:
2127 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2128 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2130 mutex_unlock(&mlxsw_sp->router->lock);
2134 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2135 enum mlxsw_sp_l3proto ul_proto,
2136 const union mlxsw_sp_l3addr *ul_sip)
2138 enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2139 struct mlxsw_sp_router *router = mlxsw_sp->router;
2140 struct mlxsw_sp_fib_entry *fib_entry;
2142 mutex_lock(&mlxsw_sp->router->lock);
2144 if (WARN_ON_ONCE(!router->nve_decap_config.valid))
2147 router->nve_decap_config.valid = false;
2149 fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2155 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2156 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2158 mutex_unlock(&mlxsw_sp->router->lock);
2161 static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp,
2163 enum mlxsw_sp_l3proto ul_proto,
2164 const union mlxsw_sp_l3addr *ul_sip)
2166 struct mlxsw_sp_router *router = mlxsw_sp->router;
2168 return router->nve_decap_config.valid &&
2169 router->nve_decap_config.ul_tb_id == ul_tb_id &&
2170 router->nve_decap_config.ul_proto == ul_proto &&
2171 !memcmp(&router->nve_decap_config.ul_sip, ul_sip,
2175 struct mlxsw_sp_neigh_key {
2176 struct neighbour *n;
2179 struct mlxsw_sp_neigh_entry {
2180 struct list_head rif_list_node;
2181 struct rhash_head ht_node;
2182 struct mlxsw_sp_neigh_key key;
2185 unsigned char ha[ETH_ALEN];
2186 struct list_head nexthop_list; /* list of nexthops using
2189 struct list_head nexthop_neighs_list_node;
2190 unsigned int counter_index;
2194 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
2195 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
2196 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
2197 .key_len = sizeof(struct mlxsw_sp_neigh_key),
2200 struct mlxsw_sp_neigh_entry *
2201 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
2202 struct mlxsw_sp_neigh_entry *neigh_entry)
2205 if (list_empty(&rif->neigh_list))
2208 return list_first_entry(&rif->neigh_list,
2209 typeof(*neigh_entry),
2212 if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
2214 return list_next_entry(neigh_entry, rif_list_node);
2217 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
2219 return neigh_entry->key.n->tbl->family;
2223 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
2225 return neigh_entry->ha;
2228 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2230 struct neighbour *n;
2232 n = neigh_entry->key.n;
2233 return ntohl(*((__be32 *) n->primary_key));
2237 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2239 struct neighbour *n;
2241 n = neigh_entry->key.n;
2242 return (struct in6_addr *) &n->primary_key;
2245 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
2246 struct mlxsw_sp_neigh_entry *neigh_entry,
2249 if (!neigh_entry->counter_valid)
2252 return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
2256 static struct mlxsw_sp_neigh_entry *
2257 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
2260 struct mlxsw_sp_neigh_entry *neigh_entry;
2262 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
2266 neigh_entry->key.n = n;
2267 neigh_entry->rif = rif;
2268 INIT_LIST_HEAD(&neigh_entry->nexthop_list);
2273 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
2279 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2280 struct mlxsw_sp_neigh_entry *neigh_entry)
2282 return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2283 &neigh_entry->ht_node,
2284 mlxsw_sp_neigh_ht_params);
2288 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2289 struct mlxsw_sp_neigh_entry *neigh_entry)
2291 rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2292 &neigh_entry->ht_node,
2293 mlxsw_sp_neigh_ht_params);
2297 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2298 struct mlxsw_sp_neigh_entry *neigh_entry)
2300 struct devlink *devlink;
2301 const char *table_name;
2303 switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2305 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2308 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2315 devlink = priv_to_devlink(mlxsw_sp->core);
2316 return devlink_dpipe_table_counter_enabled(devlink, table_name);
2320 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2321 struct mlxsw_sp_neigh_entry *neigh_entry)
2323 if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2326 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2329 neigh_entry->counter_valid = true;
2333 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2334 struct mlxsw_sp_neigh_entry *neigh_entry)
2336 if (!neigh_entry->counter_valid)
2338 mlxsw_sp_flow_counter_free(mlxsw_sp,
2339 neigh_entry->counter_index);
2340 neigh_entry->counter_valid = false;
2343 static struct mlxsw_sp_neigh_entry *
2344 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2346 struct mlxsw_sp_neigh_entry *neigh_entry;
2347 struct mlxsw_sp_rif *rif;
2350 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2352 return ERR_PTR(-EINVAL);
2354 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2356 return ERR_PTR(-ENOMEM);
2358 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2360 goto err_neigh_entry_insert;
2362 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2363 atomic_inc(&mlxsw_sp->router->neighs_update.neigh_count);
2364 list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2368 err_neigh_entry_insert:
2369 mlxsw_sp_neigh_entry_free(neigh_entry);
2370 return ERR_PTR(err);
2374 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2375 struct mlxsw_sp_neigh_entry *neigh_entry)
2377 list_del(&neigh_entry->rif_list_node);
2378 atomic_dec(&mlxsw_sp->router->neighs_update.neigh_count);
2379 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2380 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2381 mlxsw_sp_neigh_entry_free(neigh_entry);
2384 static struct mlxsw_sp_neigh_entry *
2385 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2387 struct mlxsw_sp_neigh_key key;
2390 return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2391 &key, mlxsw_sp_neigh_ht_params);
2395 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2397 unsigned long interval;
2399 #if IS_ENABLED(CONFIG_IPV6)
2400 interval = min_t(unsigned long,
2401 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2402 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2404 interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2406 mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2409 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2413 u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2414 struct net_device *dev;
2415 struct neighbour *n;
2420 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2422 if (WARN_ON_ONCE(rif >= max_rifs))
2424 if (!mlxsw_sp->router->rifs[rif]) {
2425 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2430 dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]);
2431 n = neigh_lookup(&arp_tbl, &dipn, dev);
2435 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2436 neigh_event_send(n, NULL);
2440 #if IS_ENABLED(CONFIG_IPV6)
2441 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2445 struct net_device *dev;
2446 struct neighbour *n;
2447 struct in6_addr dip;
2450 mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2453 if (!mlxsw_sp->router->rifs[rif]) {
2454 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2458 dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]);
2459 n = neigh_lookup(&nd_tbl, &dip, dev);
2463 netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2464 neigh_event_send(n, NULL);
2468 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2475 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2482 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2484 /* Hardware starts counting at 0, so add 1. */
2487 /* Each record consists of several neighbour entries. */
2488 for (i = 0; i < num_entries; i++) {
2491 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2492 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2498 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2502 /* One record contains one entry. */
2503 mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2507 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2508 char *rauhtd_pl, int rec_index)
2510 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2511 case MLXSW_REG_RAUHTD_TYPE_IPV4:
2512 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2515 case MLXSW_REG_RAUHTD_TYPE_IPV6:
2516 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2522 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2524 u8 num_rec, last_rec_index, num_entries;
2526 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2527 last_rec_index = num_rec - 1;
2529 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2531 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2532 MLXSW_REG_RAUHTD_TYPE_IPV6)
2535 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2537 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2543 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2545 enum mlxsw_reg_rauhtd_type type)
2550 /* Ensure the RIF we read from the device does not change mid-dump. */
2551 mutex_lock(&mlxsw_sp->router->lock);
2553 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2554 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2557 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2560 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2561 for (i = 0; i < num_rec; i++)
2562 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2564 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2565 mutex_unlock(&mlxsw_sp->router->lock);
2570 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2572 enum mlxsw_reg_rauhtd_type type;
2576 if (!atomic_read(&mlxsw_sp->router->neighs_update.neigh_count))
2579 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2583 type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2584 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2588 type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2589 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2595 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2597 struct mlxsw_sp_neigh_entry *neigh_entry;
2599 mutex_lock(&mlxsw_sp->router->lock);
2600 list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2601 nexthop_neighs_list_node)
2602 /* If this neigh have nexthops, make the kernel think this neigh
2603 * is active regardless of the traffic.
2605 neigh_event_send(neigh_entry->key.n, NULL);
2606 mutex_unlock(&mlxsw_sp->router->lock);
2610 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2612 unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2614 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2615 msecs_to_jiffies(interval));
2618 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2620 struct mlxsw_sp_router *router;
2623 router = container_of(work, struct mlxsw_sp_router,
2624 neighs_update.dw.work);
2625 err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2627 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2629 mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2631 mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2634 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2636 struct mlxsw_sp_neigh_entry *neigh_entry;
2637 struct mlxsw_sp_router *router;
2639 router = container_of(work, struct mlxsw_sp_router,
2640 nexthop_probe_dw.work);
2641 /* Iterate over nexthop neighbours, find those who are unresolved and
2642 * send arp on them. This solves the chicken-egg problem when
2643 * the nexthop wouldn't get offloaded until the neighbor is resolved
2644 * but it wouldn't get resolved ever in case traffic is flowing in HW
2645 * using different nexthop.
2647 mutex_lock(&router->lock);
2648 list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2649 nexthop_neighs_list_node)
2650 if (!neigh_entry->connected)
2651 neigh_event_send(neigh_entry->key.n, NULL);
2652 mutex_unlock(&router->lock);
2654 mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2655 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2659 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2660 struct mlxsw_sp_neigh_entry *neigh_entry,
2661 bool removing, bool dead);
2663 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2665 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2666 MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2670 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2671 struct mlxsw_sp_neigh_entry *neigh_entry,
2672 enum mlxsw_reg_rauht_op op)
2674 struct neighbour *n = neigh_entry->key.n;
2675 u32 dip = ntohl(*((__be32 *) n->primary_key));
2676 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2678 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2680 if (neigh_entry->counter_valid)
2681 mlxsw_reg_rauht_pack_counter(rauht_pl,
2682 neigh_entry->counter_index);
2683 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2687 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2688 struct mlxsw_sp_neigh_entry *neigh_entry,
2689 enum mlxsw_reg_rauht_op op)
2691 struct neighbour *n = neigh_entry->key.n;
2692 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2693 const char *dip = n->primary_key;
2695 mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2697 if (neigh_entry->counter_valid)
2698 mlxsw_reg_rauht_pack_counter(rauht_pl,
2699 neigh_entry->counter_index);
2700 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2703 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2705 struct neighbour *n = neigh_entry->key.n;
2707 /* Packets with a link-local destination address are trapped
2708 * after LPM lookup and never reach the neighbour table, so
2709 * there is no need to program such neighbours to the device.
2711 if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2712 IPV6_ADDR_LINKLOCAL)
2718 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2719 struct mlxsw_sp_neigh_entry *neigh_entry,
2722 enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2725 if (!adding && !neigh_entry->connected)
2727 neigh_entry->connected = adding;
2728 if (neigh_entry->key.n->tbl->family == AF_INET) {
2729 err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2733 } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2734 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2736 err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2746 neigh_entry->key.n->flags |= NTF_OFFLOADED;
2748 neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2752 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2753 struct mlxsw_sp_neigh_entry *neigh_entry,
2757 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2759 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2760 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2763 struct mlxsw_sp_netevent_work {
2764 struct work_struct work;
2765 struct mlxsw_sp *mlxsw_sp;
2766 struct neighbour *n;
2769 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2771 struct mlxsw_sp_netevent_work *net_work =
2772 container_of(work, struct mlxsw_sp_netevent_work, work);
2773 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2774 struct mlxsw_sp_neigh_entry *neigh_entry;
2775 struct neighbour *n = net_work->n;
2776 unsigned char ha[ETH_ALEN];
2777 bool entry_connected;
2780 /* If these parameters are changed after we release the lock,
2781 * then we are guaranteed to receive another event letting us
2784 read_lock_bh(&n->lock);
2785 memcpy(ha, n->ha, ETH_ALEN);
2786 nud_state = n->nud_state;
2788 read_unlock_bh(&n->lock);
2790 mutex_lock(&mlxsw_sp->router->lock);
2791 mlxsw_sp_span_respin(mlxsw_sp);
2793 entry_connected = nud_state & NUD_VALID && !dead;
2794 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2795 if (!entry_connected && !neigh_entry)
2798 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2799 if (IS_ERR(neigh_entry))
2803 if (neigh_entry->connected && entry_connected &&
2804 !memcmp(neigh_entry->ha, ha, ETH_ALEN))
2807 memcpy(neigh_entry->ha, ha, ETH_ALEN);
2808 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2809 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2812 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2813 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2816 mutex_unlock(&mlxsw_sp->router->lock);
2821 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2823 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2825 struct mlxsw_sp_netevent_work *net_work =
2826 container_of(work, struct mlxsw_sp_netevent_work, work);
2827 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2829 mlxsw_sp_mp_hash_init(mlxsw_sp);
2833 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2835 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2837 struct mlxsw_sp_netevent_work *net_work =
2838 container_of(work, struct mlxsw_sp_netevent_work, work);
2839 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2841 __mlxsw_sp_router_init(mlxsw_sp);
2845 static int mlxsw_sp_router_schedule_work(struct net *net,
2846 struct mlxsw_sp_router *router,
2847 struct neighbour *n,
2848 void (*cb)(struct work_struct *))
2850 struct mlxsw_sp_netevent_work *net_work;
2852 if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2855 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2859 INIT_WORK(&net_work->work, cb);
2860 net_work->mlxsw_sp = router->mlxsw_sp;
2862 mlxsw_core_schedule_work(&net_work->work);
2866 static bool mlxsw_sp_dev_lower_is_port(struct net_device *dev)
2868 struct mlxsw_sp_port *mlxsw_sp_port;
2871 mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev);
2873 return !!mlxsw_sp_port;
2876 static int mlxsw_sp_router_schedule_neigh_work(struct mlxsw_sp_router *router,
2877 struct neighbour *n)
2881 net = neigh_parms_net(n->parms);
2883 /* Take a reference to ensure the neighbour won't be destructed until we
2884 * drop the reference in delayed work.
2887 return mlxsw_sp_router_schedule_work(net, router, n,
2888 mlxsw_sp_router_neigh_event_work);
2891 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2892 unsigned long event, void *ptr)
2894 struct mlxsw_sp_router *router;
2895 unsigned long interval;
2896 struct neigh_parms *p;
2897 struct neighbour *n;
2899 router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2902 case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2905 /* We don't care about changes in the default table. */
2906 if (!p->dev || (p->tbl->family != AF_INET &&
2907 p->tbl->family != AF_INET6))
2910 /* We are in atomic context and can't take RTNL mutex,
2911 * so use RCU variant to walk the device chain.
2913 if (!mlxsw_sp_dev_lower_is_port(p->dev))
2916 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2917 router->neighs_update.interval = interval;
2919 case NETEVENT_NEIGH_UPDATE:
2922 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2925 if (!mlxsw_sp_dev_lower_is_port(n->dev))
2928 return mlxsw_sp_router_schedule_neigh_work(router, n);
2930 case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2931 case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2932 return mlxsw_sp_router_schedule_work(ptr, router, NULL,
2933 mlxsw_sp_router_mp_hash_event_work);
2935 case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2936 return mlxsw_sp_router_schedule_work(ptr, router, NULL,
2937 mlxsw_sp_router_update_priority_work);
2943 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2947 err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2948 &mlxsw_sp_neigh_ht_params);
2952 /* Initialize the polling interval according to the default
2955 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2957 /* Create the delayed works for the activity_update */
2958 INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2959 mlxsw_sp_router_neighs_update_work);
2960 INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2961 mlxsw_sp_router_probe_unresolved_nexthops);
2962 atomic_set(&mlxsw_sp->router->neighs_update.neigh_count, 0);
2963 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2964 mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2968 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2970 cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2971 cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2972 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2975 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2976 struct mlxsw_sp_rif *rif)
2978 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2980 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2982 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2983 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2987 struct mlxsw_sp_neigh_rif_made_sync {
2988 struct mlxsw_sp *mlxsw_sp;
2989 struct mlxsw_sp_rif *rif;
2993 static void mlxsw_sp_neigh_rif_made_sync_each(struct neighbour *n, void *data)
2995 struct mlxsw_sp_neigh_rif_made_sync *rms = data;
3000 if (n->dev != mlxsw_sp_rif_dev(rms->rif))
3002 rc = mlxsw_sp_router_schedule_neigh_work(rms->mlxsw_sp->router, n);
3003 if (rc != NOTIFY_DONE)
3007 static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
3008 struct mlxsw_sp_rif *rif)
3010 struct mlxsw_sp_neigh_rif_made_sync rms = {
3011 .mlxsw_sp = mlxsw_sp,
3015 neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
3019 #if IS_ENABLED(CONFIG_IPV6)
3020 neigh_for_each(&nd_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
3029 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
3033 enum mlxsw_sp_nexthop_type {
3034 MLXSW_SP_NEXTHOP_TYPE_ETH,
3035 MLXSW_SP_NEXTHOP_TYPE_IPIP,
3038 enum mlxsw_sp_nexthop_action {
3039 /* Nexthop forwards packets to an egress RIF */
3040 MLXSW_SP_NEXTHOP_ACTION_FORWARD,
3041 /* Nexthop discards packets */
3042 MLXSW_SP_NEXTHOP_ACTION_DISCARD,
3043 /* Nexthop traps packets */
3044 MLXSW_SP_NEXTHOP_ACTION_TRAP,
3047 struct mlxsw_sp_nexthop_key {
3048 struct fib_nh *fib_nh;
3051 struct mlxsw_sp_nexthop {
3052 struct list_head neigh_list_node; /* member of neigh entry list */
3053 struct list_head crif_list_node;
3054 struct list_head router_list_node;
3055 struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group
3056 * this nexthop belongs to
3058 struct rhash_head ht_node;
3059 struct neigh_table *neigh_tbl;
3060 struct mlxsw_sp_nexthop_key key;
3061 unsigned char gw_addr[sizeof(struct in6_addr)];
3065 int num_adj_entries;
3066 struct mlxsw_sp_crif *crif;
3067 u8 should_offload:1, /* set indicates this nexthop should be written
3068 * to the adjacency table.
3070 offloaded:1, /* set indicates this nexthop was written to the
3073 update:1; /* set indicates this nexthop should be updated in the
3074 * adjacency table (f.e., its MAC changed).
3076 enum mlxsw_sp_nexthop_action action;
3077 enum mlxsw_sp_nexthop_type type;
3079 struct mlxsw_sp_neigh_entry *neigh_entry;
3080 struct mlxsw_sp_ipip_entry *ipip_entry;
3082 unsigned int counter_index;
3086 static struct net_device *
3087 mlxsw_sp_nexthop_dev(const struct mlxsw_sp_nexthop *nh)
3091 return nh->crif->key.dev;
3094 enum mlxsw_sp_nexthop_group_type {
3095 MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4,
3096 MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6,
3097 MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ,
3100 struct mlxsw_sp_nexthop_group_info {
3101 struct mlxsw_sp_nexthop_group *nh_grp;
3105 int sum_norm_weight;
3106 u8 adj_index_valid:1,
3107 gateway:1, /* routes using the group use a gateway */
3109 struct list_head list; /* member in nh_res_grp_list */
3110 struct mlxsw_sp_nexthop nexthops[];
3113 static struct mlxsw_sp_rif *
3114 mlxsw_sp_nhgi_rif(const struct mlxsw_sp_nexthop_group_info *nhgi)
3116 struct mlxsw_sp_crif *crif = nhgi->nexthops[0].crif;
3123 struct mlxsw_sp_nexthop_group_vr_key {
3125 enum mlxsw_sp_l3proto proto;
3128 struct mlxsw_sp_nexthop_group_vr_entry {
3129 struct list_head list; /* member in vr_list */
3130 struct rhash_head ht_node; /* member in vr_ht */
3131 refcount_t ref_count;
3132 struct mlxsw_sp_nexthop_group_vr_key key;
3135 struct mlxsw_sp_nexthop_group {
3136 struct rhash_head ht_node;
3137 struct list_head fib_list; /* list of fib entries that use this group */
3140 struct fib_info *fi;
3146 struct mlxsw_sp_nexthop_group_info *nhgi;
3147 struct list_head vr_list;
3148 struct rhashtable vr_ht;
3149 enum mlxsw_sp_nexthop_group_type type;
3153 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
3154 struct mlxsw_sp_nexthop *nh)
3156 struct devlink *devlink;
3158 devlink = priv_to_devlink(mlxsw_sp->core);
3159 if (!devlink_dpipe_table_counter_enabled(devlink,
3160 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
3163 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
3166 nh->counter_valid = true;
3169 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
3170 struct mlxsw_sp_nexthop *nh)
3172 if (!nh->counter_valid)
3174 mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
3175 nh->counter_valid = false;
3178 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
3179 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
3181 if (!nh->counter_valid)
3184 return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
3188 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
3189 struct mlxsw_sp_nexthop *nh)
3192 if (list_empty(&router->nexthop_list))
3195 return list_first_entry(&router->nexthop_list,
3196 typeof(*nh), router_list_node);
3198 if (list_is_last(&nh->router_list_node, &router->nexthop_list))
3200 return list_next_entry(nh, router_list_node);
3203 bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh)
3205 return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD;
3208 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
3210 if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH ||
3211 !mlxsw_sp_nexthop_is_forward(nh))
3213 return nh->neigh_entry->ha;
3216 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
3217 u32 *p_adj_size, u32 *p_adj_hash_index)
3219 struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3220 u32 adj_hash_index = 0;
3223 if (!nh->offloaded || !nhgi->adj_index_valid)
3226 *p_adj_index = nhgi->adj_index;
3227 *p_adj_size = nhgi->ecmp_size;
3229 for (i = 0; i < nhgi->count; i++) {
3230 struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3234 if (nh_iter->offloaded)
3235 adj_hash_index += nh_iter->num_adj_entries;
3238 *p_adj_hash_index = adj_hash_index;
3242 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
3244 if (WARN_ON(!nh->crif))
3246 return nh->crif->rif;
3249 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
3251 struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3254 for (i = 0; i < nhgi->count; i++) {
3255 struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3257 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
3263 static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = {
3264 .key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key),
3265 .head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node),
3266 .key_len = sizeof(struct mlxsw_sp_nexthop_group_vr_key),
3267 .automatic_shrinking = true,
3270 static struct mlxsw_sp_nexthop_group_vr_entry *
3271 mlxsw_sp_nexthop_group_vr_entry_lookup(struct mlxsw_sp_nexthop_group *nh_grp,
3272 const struct mlxsw_sp_fib *fib)
3274 struct mlxsw_sp_nexthop_group_vr_key key;
3276 memset(&key, 0, sizeof(key));
3277 key.vr_id = fib->vr->id;
3278 key.proto = fib->proto;
3279 return rhashtable_lookup_fast(&nh_grp->vr_ht, &key,
3280 mlxsw_sp_nexthop_group_vr_ht_params);
3284 mlxsw_sp_nexthop_group_vr_entry_create(struct mlxsw_sp_nexthop_group *nh_grp,
3285 const struct mlxsw_sp_fib *fib)
3287 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3290 vr_entry = kzalloc(sizeof(*vr_entry), GFP_KERNEL);
3294 vr_entry->key.vr_id = fib->vr->id;
3295 vr_entry->key.proto = fib->proto;
3296 refcount_set(&vr_entry->ref_count, 1);
3298 err = rhashtable_insert_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3299 mlxsw_sp_nexthop_group_vr_ht_params);
3301 goto err_hashtable_insert;
3303 list_add(&vr_entry->list, &nh_grp->vr_list);
3307 err_hashtable_insert:
3313 mlxsw_sp_nexthop_group_vr_entry_destroy(struct mlxsw_sp_nexthop_group *nh_grp,
3314 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry)
3316 list_del(&vr_entry->list);
3317 rhashtable_remove_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3318 mlxsw_sp_nexthop_group_vr_ht_params);
3323 mlxsw_sp_nexthop_group_vr_link(struct mlxsw_sp_nexthop_group *nh_grp,
3324 const struct mlxsw_sp_fib *fib)
3326 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3328 vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3330 refcount_inc(&vr_entry->ref_count);
3334 return mlxsw_sp_nexthop_group_vr_entry_create(nh_grp, fib);
3338 mlxsw_sp_nexthop_group_vr_unlink(struct mlxsw_sp_nexthop_group *nh_grp,
3339 const struct mlxsw_sp_fib *fib)
3341 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3343 vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3344 if (WARN_ON_ONCE(!vr_entry))
3347 if (!refcount_dec_and_test(&vr_entry->ref_count))
3350 mlxsw_sp_nexthop_group_vr_entry_destroy(nh_grp, vr_entry);
3353 struct mlxsw_sp_nexthop_group_cmp_arg {
3354 enum mlxsw_sp_nexthop_group_type type;
3356 struct fib_info *fi;
3357 struct mlxsw_sp_fib6_entry *fib6_entry;
3363 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
3364 const struct in6_addr *gw, int ifindex,
3369 for (i = 0; i < nh_grp->nhgi->count; i++) {
3370 const struct mlxsw_sp_nexthop *nh;
3372 nh = &nh_grp->nhgi->nexthops[i];
3373 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
3374 ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
3382 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
3383 const struct mlxsw_sp_fib6_entry *fib6_entry)
3385 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3387 if (nh_grp->nhgi->count != fib6_entry->nrt6)
3390 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3391 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3392 struct in6_addr *gw;
3393 int ifindex, weight;
3395 ifindex = fib6_nh->fib_nh_dev->ifindex;
3396 weight = fib6_nh->fib_nh_weight;
3397 gw = &fib6_nh->fib_nh_gw6;
3398 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
3407 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
3409 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
3410 const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
3412 if (nh_grp->type != cmp_arg->type)
3415 switch (cmp_arg->type) {
3416 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3417 return cmp_arg->fi != nh_grp->ipv4.fi;
3418 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3419 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
3420 cmp_arg->fib6_entry);
3421 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3422 return cmp_arg->id != nh_grp->obj.id;
3429 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
3431 const struct mlxsw_sp_nexthop_group *nh_grp = data;
3432 const struct mlxsw_sp_nexthop *nh;
3433 struct fib_info *fi;
3437 switch (nh_grp->type) {
3438 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3439 fi = nh_grp->ipv4.fi;
3440 return jhash(&fi, sizeof(fi), seed);
3441 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3442 val = nh_grp->nhgi->count;
3443 for (i = 0; i < nh_grp->nhgi->count; i++) {
3444 nh = &nh_grp->nhgi->nexthops[i];
3445 val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
3446 val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
3448 return jhash(&val, sizeof(val), seed);
3449 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3450 return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed);
3458 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
3460 unsigned int val = fib6_entry->nrt6;
3461 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3463 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3464 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3465 struct net_device *dev = fib6_nh->fib_nh_dev;
3466 struct in6_addr *gw = &fib6_nh->fib_nh_gw6;
3468 val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
3469 val ^= jhash(gw, sizeof(*gw), seed);
3472 return jhash(&val, sizeof(val), seed);
3476 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
3478 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
3480 switch (cmp_arg->type) {
3481 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3482 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
3483 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3484 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
3485 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3486 return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed);
3493 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3494 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3495 .hashfn = mlxsw_sp_nexthop_group_hash,
3496 .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj,
3497 .obj_cmpfn = mlxsw_sp_nexthop_group_cmp,
3500 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3501 struct mlxsw_sp_nexthop_group *nh_grp)
3503 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3504 !nh_grp->nhgi->gateway)
3507 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3509 mlxsw_sp_nexthop_group_ht_params);
3512 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3513 struct mlxsw_sp_nexthop_group *nh_grp)
3515 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3516 !nh_grp->nhgi->gateway)
3519 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3521 mlxsw_sp_nexthop_group_ht_params);
3524 static struct mlxsw_sp_nexthop_group *
3525 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3526 struct fib_info *fi)
3528 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3530 cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
3532 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3534 mlxsw_sp_nexthop_group_ht_params);
3537 static struct mlxsw_sp_nexthop_group *
3538 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3539 struct mlxsw_sp_fib6_entry *fib6_entry)
3541 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3543 cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
3544 cmp_arg.fib6_entry = fib6_entry;
3545 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3547 mlxsw_sp_nexthop_group_ht_params);
3550 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3551 .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3552 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3553 .key_len = sizeof(struct mlxsw_sp_nexthop_key),
3556 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3557 struct mlxsw_sp_nexthop *nh)
3559 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3560 &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3563 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3564 struct mlxsw_sp_nexthop *nh)
3566 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3567 mlxsw_sp_nexthop_ht_params);
3570 static struct mlxsw_sp_nexthop *
3571 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3572 struct mlxsw_sp_nexthop_key key)
3574 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3575 mlxsw_sp_nexthop_ht_params);
3578 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3579 enum mlxsw_sp_l3proto proto,
3581 u32 adj_index, u16 ecmp_size,
3585 char raleu_pl[MLXSW_REG_RALEU_LEN];
3587 mlxsw_reg_raleu_pack(raleu_pl,
3588 (enum mlxsw_reg_ralxx_protocol) proto, vr_id,
3589 adj_index, ecmp_size, new_adj_index,
3591 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3594 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3595 struct mlxsw_sp_nexthop_group *nh_grp,
3596 u32 old_adj_index, u16 old_ecmp_size)
3598 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3599 struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3602 list_for_each_entry(vr_entry, &nh_grp->vr_list, list) {
3603 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp,
3604 vr_entry->key.proto,
3605 vr_entry->key.vr_id,
3611 goto err_mass_update_vr;
3616 list_for_each_entry_continue_reverse(vr_entry, &nh_grp->vr_list, list)
3617 mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr_entry->key.proto,
3618 vr_entry->key.vr_id,
3621 old_adj_index, old_ecmp_size);
3625 static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
3627 struct mlxsw_sp_nexthop *nh,
3628 bool force, char *ratr_pl)
3630 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3631 struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh);
3632 enum mlxsw_reg_ratr_op op;
3635 rif_index = rif ? rif->rif_index :
3636 mlxsw_sp->router->lb_crif->rif->rif_index;
3637 op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
3638 MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
3639 mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET,
3640 adj_index, rif_index);
3641 switch (nh->action) {
3642 case MLXSW_SP_NEXTHOP_ACTION_FORWARD:
3643 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3645 case MLXSW_SP_NEXTHOP_ACTION_DISCARD:
3646 mlxsw_reg_ratr_trap_action_set(ratr_pl,
3647 MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
3649 case MLXSW_SP_NEXTHOP_ACTION_TRAP:
3650 mlxsw_reg_ratr_trap_action_set(ratr_pl,
3651 MLXSW_REG_RATR_TRAP_ACTION_TRAP);
3652 mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
3658 if (nh->counter_valid)
3659 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3661 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3663 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3666 int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3667 struct mlxsw_sp_nexthop *nh, bool force,
3672 for (i = 0; i < nh->num_adj_entries; i++) {
3675 err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
3676 nh, force, ratr_pl);
3684 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3686 struct mlxsw_sp_nexthop *nh,
3687 bool force, char *ratr_pl)
3689 const struct mlxsw_sp_ipip_ops *ipip_ops;
3691 ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3692 return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry,
3696 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3698 struct mlxsw_sp_nexthop *nh, bool force,
3703 for (i = 0; i < nh->num_adj_entries; i++) {
3706 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3707 nh, force, ratr_pl);
3715 static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3716 struct mlxsw_sp_nexthop *nh, bool force,
3719 /* When action is discard or trap, the nexthop must be
3720 * programmed as an Ethernet nexthop.
3722 if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
3723 nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
3724 nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3725 return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh,
3728 return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh,
3733 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3734 struct mlxsw_sp_nexthop_group_info *nhgi,
3737 char ratr_pl[MLXSW_REG_RATR_LEN];
3738 u32 adj_index = nhgi->adj_index; /* base */
3739 struct mlxsw_sp_nexthop *nh;
3742 for (i = 0; i < nhgi->count; i++) {
3743 nh = &nhgi->nexthops[i];
3745 if (!nh->should_offload) {
3750 if (nh->update || reallocate) {
3753 err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
3760 adj_index += nh->num_adj_entries;
3766 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3767 struct mlxsw_sp_nexthop_group *nh_grp)
3769 struct mlxsw_sp_fib_entry *fib_entry;
3772 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3773 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3780 struct mlxsw_sp_adj_grp_size_range {
3781 u16 start; /* Inclusive */
3782 u16 end; /* Inclusive */
3785 /* Ordered by range start value */
3786 static const struct mlxsw_sp_adj_grp_size_range
3787 mlxsw_sp1_adj_grp_size_ranges[] = {
3788 { .start = 1, .end = 64 },
3789 { .start = 512, .end = 512 },
3790 { .start = 1024, .end = 1024 },
3791 { .start = 2048, .end = 2048 },
3792 { .start = 4096, .end = 4096 },
3795 /* Ordered by range start value */
3796 static const struct mlxsw_sp_adj_grp_size_range
3797 mlxsw_sp2_adj_grp_size_ranges[] = {
3798 { .start = 1, .end = 128 },
3799 { .start = 256, .end = 256 },
3800 { .start = 512, .end = 512 },
3801 { .start = 1024, .end = 1024 },
3802 { .start = 2048, .end = 2048 },
3803 { .start = 4096, .end = 4096 },
3806 static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp,
3807 u16 *p_adj_grp_size)
3811 for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
3812 const struct mlxsw_sp_adj_grp_size_range *size_range;
3814 size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3816 if (*p_adj_grp_size >= size_range->start &&
3817 *p_adj_grp_size <= size_range->end)
3820 if (*p_adj_grp_size <= size_range->end) {
3821 *p_adj_grp_size = size_range->end;
3827 static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp,
3828 u16 *p_adj_grp_size,
3829 unsigned int alloc_size)
3833 for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) {
3834 const struct mlxsw_sp_adj_grp_size_range *size_range;
3836 size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3838 if (alloc_size >= size_range->end) {
3839 *p_adj_grp_size = size_range->end;
3845 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3846 u16 *p_adj_grp_size)
3848 unsigned int alloc_size;
3851 /* Round up the requested group size to the next size supported
3852 * by the device and make sure the request can be satisfied.
3854 mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size);
3855 err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3856 MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3857 *p_adj_grp_size, &alloc_size);
3860 /* It is possible the allocation results in more allocated
3861 * entries than requested. Try to use as much of them as
3864 mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size);
3870 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi)
3872 int i, g = 0, sum_norm_weight = 0;
3873 struct mlxsw_sp_nexthop *nh;
3875 for (i = 0; i < nhgi->count; i++) {
3876 nh = &nhgi->nexthops[i];
3878 if (!nh->should_offload)
3881 g = gcd(nh->nh_weight, g);
3886 for (i = 0; i < nhgi->count; i++) {
3887 nh = &nhgi->nexthops[i];
3889 if (!nh->should_offload)
3891 nh->norm_nh_weight = nh->nh_weight / g;
3892 sum_norm_weight += nh->norm_nh_weight;
3895 nhgi->sum_norm_weight = sum_norm_weight;
3899 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi)
3901 int i, weight = 0, lower_bound = 0;
3902 int total = nhgi->sum_norm_weight;
3903 u16 ecmp_size = nhgi->ecmp_size;
3905 for (i = 0; i < nhgi->count; i++) {
3906 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
3909 if (!nh->should_offload)
3911 weight += nh->norm_nh_weight;
3912 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3913 nh->num_adj_entries = upper_bound - lower_bound;
3914 lower_bound = upper_bound;
3918 static struct mlxsw_sp_nexthop *
3919 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3920 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
3923 mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3924 struct mlxsw_sp_nexthop_group *nh_grp)
3928 for (i = 0; i < nh_grp->nhgi->count; i++) {
3929 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3932 nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3934 nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3939 __mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
3940 struct mlxsw_sp_fib6_entry *fib6_entry)
3942 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3944 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3945 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3946 struct mlxsw_sp_nexthop *nh;
3948 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3949 if (nh && nh->offloaded)
3950 fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3952 fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3957 mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3958 struct mlxsw_sp_nexthop_group *nh_grp)
3960 struct mlxsw_sp_fib6_entry *fib6_entry;
3962 /* Unfortunately, in IPv6 the route and the nexthop are described by
3963 * the same struct, so we need to iterate over all the routes using the
3964 * nexthop group and set / clear the offload indication for them.
3966 list_for_each_entry(fib6_entry, &nh_grp->fib_list,
3967 common.nexthop_group_node)
3968 __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
3972 mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3973 const struct mlxsw_sp_nexthop *nh,
3976 struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
3977 bool offload = false, trap = false;
3979 if (nh->offloaded) {
3980 if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3985 nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3986 bucket_index, offload, trap);
3990 mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3991 struct mlxsw_sp_nexthop_group *nh_grp)
3995 /* Do not update the flags if the nexthop group is being destroyed
3997 * 1. The nexthop objects is being deleted, in which case the flags are
3999 * 2. The nexthop group was replaced by a newer group, in which case
4000 * the flags of the nexthop object were already updated based on the
4003 if (nh_grp->can_destroy)
4006 nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
4007 nh_grp->nhgi->adj_index_valid, false);
4009 /* Update flags of individual nexthop buckets in case of a resilient
4012 if (!nh_grp->nhgi->is_resilient)
4015 for (i = 0; i < nh_grp->nhgi->count; i++) {
4016 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
4018 mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i);
4023 mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
4024 struct mlxsw_sp_nexthop_group *nh_grp)
4026 switch (nh_grp->type) {
4027 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
4028 mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
4030 case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
4031 mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
4033 case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
4034 mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp);
4040 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
4041 struct mlxsw_sp_nexthop_group *nh_grp)
4043 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
4044 u16 ecmp_size, old_ecmp_size;
4045 struct mlxsw_sp_nexthop *nh;
4046 bool offload_change = false;
4048 bool old_adj_index_valid;
4053 return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
4055 for (i = 0; i < nhgi->count; i++) {
4056 nh = &nhgi->nexthops[i];
4058 if (nh->should_offload != nh->offloaded) {
4059 offload_change = true;
4060 if (nh->should_offload)
4064 if (!offload_change) {
4065 /* Nothing was added or removed, so no need to reallocate. Just
4066 * update MAC on existing adjacency indexes.
4068 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false);
4070 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
4073 /* Flags of individual nexthop buckets might need to be
4076 mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4079 mlxsw_sp_nexthop_group_normalize(nhgi);
4080 if (!nhgi->sum_norm_weight) {
4081 /* No neigh of this group is connected so we just set
4082 * the trap and let everthing flow through kernel.
4088 ecmp_size = nhgi->sum_norm_weight;
4089 err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
4091 /* No valid allocation size available. */
4094 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4095 ecmp_size, &adj_index);
4097 /* We ran out of KVD linear space, just set the
4098 * trap and let everything flow through kernel.
4100 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
4103 old_adj_index_valid = nhgi->adj_index_valid;
4104 old_adj_index = nhgi->adj_index;
4105 old_ecmp_size = nhgi->ecmp_size;
4106 nhgi->adj_index_valid = 1;
4107 nhgi->adj_index = adj_index;
4108 nhgi->ecmp_size = ecmp_size;
4109 mlxsw_sp_nexthop_group_rebalance(nhgi);
4110 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true);
4112 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
4116 mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4118 if (!old_adj_index_valid) {
4119 /* The trap was set for fib entries, so we have to call
4120 * fib entry update to unset it and use adjacency index.
4122 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
4124 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
4130 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
4131 old_adj_index, old_ecmp_size);
4132 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4133 old_ecmp_size, old_adj_index);
4135 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
4142 old_adj_index_valid = nhgi->adj_index_valid;
4143 nhgi->adj_index_valid = 0;
4144 for (i = 0; i < nhgi->count; i++) {
4145 nh = &nhgi->nexthops[i];
4148 err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
4150 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
4151 mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4152 if (old_adj_index_valid)
4153 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4154 nhgi->ecmp_size, nhgi->adj_index);
4158 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
4162 nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD;
4163 nh->should_offload = 1;
4164 } else if (nh->nhgi->is_resilient) {
4165 nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
4166 nh->should_offload = 1;
4168 nh->should_offload = 0;
4174 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
4175 struct mlxsw_sp_neigh_entry *neigh_entry)
4177 struct neighbour *n, *old_n = neigh_entry->key.n;
4178 struct mlxsw_sp_nexthop *nh;
4179 struct net_device *dev;
4180 bool entry_connected;
4184 nh = list_first_entry(&neigh_entry->nexthop_list,
4185 struct mlxsw_sp_nexthop, neigh_list_node);
4186 dev = mlxsw_sp_nexthop_dev(nh);
4188 n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev);
4190 n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev);
4193 neigh_event_send(n, NULL);
4196 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
4197 neigh_entry->key.n = n;
4198 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4200 goto err_neigh_entry_insert;
4202 read_lock_bh(&n->lock);
4203 nud_state = n->nud_state;
4205 read_unlock_bh(&n->lock);
4206 entry_connected = nud_state & NUD_VALID && !dead;
4208 list_for_each_entry(nh, &neigh_entry->nexthop_list,
4210 neigh_release(old_n);
4212 __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
4213 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4220 err_neigh_entry_insert:
4221 neigh_entry->key.n = old_n;
4222 mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4228 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
4229 struct mlxsw_sp_neigh_entry *neigh_entry,
4230 bool removing, bool dead)
4232 struct mlxsw_sp_nexthop *nh;
4234 if (list_empty(&neigh_entry->nexthop_list))
4240 err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
4243 dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
4247 list_for_each_entry(nh, &neigh_entry->nexthop_list,
4249 __mlxsw_sp_nexthop_neigh_update(nh, removing);
4250 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4254 static void mlxsw_sp_nexthop_crif_init(struct mlxsw_sp_nexthop *nh,
4255 struct mlxsw_sp_crif *crif)
4261 list_add(&nh->crif_list_node, &crif->nexthop_list);
4264 static void mlxsw_sp_nexthop_crif_fini(struct mlxsw_sp_nexthop *nh)
4269 list_del(&nh->crif_list_node);
4273 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
4274 struct mlxsw_sp_nexthop *nh)
4276 struct mlxsw_sp_neigh_entry *neigh_entry;
4277 struct net_device *dev;
4278 struct neighbour *n;
4282 if (WARN_ON(!nh->crif->rif))
4285 if (!nh->nhgi->gateway || nh->neigh_entry)
4287 dev = mlxsw_sp_nexthop_dev(nh);
4289 /* Take a reference of neigh here ensuring that neigh would
4290 * not be destructed before the nexthop entry is finished.
4291 * The reference is taken either in neigh_lookup() or
4292 * in neigh_create() in case n is not found.
4294 n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev);
4296 n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev);
4299 neigh_event_send(n, NULL);
4301 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
4303 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
4304 if (IS_ERR(neigh_entry)) {
4306 goto err_neigh_entry_create;
4310 /* If that is the first nexthop connected to that neigh, add to
4311 * nexthop_neighs_list
4313 if (list_empty(&neigh_entry->nexthop_list))
4314 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
4315 &mlxsw_sp->router->nexthop_neighs_list);
4317 nh->neigh_entry = neigh_entry;
4318 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
4319 read_lock_bh(&n->lock);
4320 nud_state = n->nud_state;
4322 read_unlock_bh(&n->lock);
4323 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
4327 err_neigh_entry_create:
4332 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
4333 struct mlxsw_sp_nexthop *nh)
4335 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
4336 struct neighbour *n;
4340 n = neigh_entry->key.n;
4342 __mlxsw_sp_nexthop_neigh_update(nh, true);
4343 list_del(&nh->neigh_list_node);
4344 nh->neigh_entry = NULL;
4346 /* If that is the last nexthop connected to that neigh, remove from
4347 * nexthop_neighs_list
4349 if (list_empty(&neigh_entry->nexthop_list))
4350 list_del(&neigh_entry->nexthop_neighs_list_node);
4352 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
4353 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
4358 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
4360 struct net_device *ul_dev;
4364 ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
4365 is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true;
4371 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
4372 struct mlxsw_sp_nexthop *nh,
4373 struct mlxsw_sp_ipip_entry *ipip_entry)
4375 struct mlxsw_sp_crif *crif;
4378 if (!nh->nhgi->gateway || nh->ipip_entry)
4381 crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, ipip_entry->ol_dev);
4385 nh->ipip_entry = ipip_entry;
4386 removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
4387 __mlxsw_sp_nexthop_neigh_update(nh, removing);
4388 mlxsw_sp_nexthop_crif_init(nh, crif);
4391 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
4392 struct mlxsw_sp_nexthop *nh)
4394 struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
4399 __mlxsw_sp_nexthop_neigh_update(nh, true);
4400 nh->ipip_entry = NULL;
4403 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4404 const struct fib_nh *fib_nh,
4405 enum mlxsw_sp_ipip_type *p_ipipt)
4407 struct net_device *dev = fib_nh->fib_nh_dev;
4410 fib_nh->nh_parent->fib_type == RTN_UNICAST &&
4411 mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
4414 static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp,
4415 struct mlxsw_sp_nexthop *nh,
4416 const struct net_device *dev)
4418 const struct mlxsw_sp_ipip_ops *ipip_ops;
4419 struct mlxsw_sp_ipip_entry *ipip_entry;
4420 struct mlxsw_sp_crif *crif;
4423 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4425 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4426 if (ipip_ops->can_offload(mlxsw_sp, dev)) {
4427 nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4428 mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4433 nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4434 crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, dev);
4438 mlxsw_sp_nexthop_crif_init(nh, crif);
4443 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4445 goto err_neigh_init;
4450 mlxsw_sp_nexthop_crif_fini(nh);
4454 static int mlxsw_sp_nexthop_type_rif_made(struct mlxsw_sp *mlxsw_sp,
4455 struct mlxsw_sp_nexthop *nh)
4458 case MLXSW_SP_NEXTHOP_TYPE_ETH:
4459 return mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4460 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4467 static void mlxsw_sp_nexthop_type_rif_gone(struct mlxsw_sp *mlxsw_sp,
4468 struct mlxsw_sp_nexthop *nh)
4471 case MLXSW_SP_NEXTHOP_TYPE_ETH:
4472 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
4474 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4475 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
4480 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
4481 struct mlxsw_sp_nexthop *nh)
4483 mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh);
4484 mlxsw_sp_nexthop_crif_fini(nh);
4487 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
4488 struct mlxsw_sp_nexthop_group *nh_grp,
4489 struct mlxsw_sp_nexthop *nh,
4490 struct fib_nh *fib_nh)
4492 struct net_device *dev = fib_nh->fib_nh_dev;
4493 struct in_device *in_dev;
4496 nh->nhgi = nh_grp->nhgi;
4497 nh->key.fib_nh = fib_nh;
4498 #ifdef CONFIG_IP_ROUTE_MULTIPATH
4499 nh->nh_weight = fib_nh->fib_nh_weight;
4503 memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
4504 nh->neigh_tbl = &arp_tbl;
4505 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
4509 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4510 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4514 nh->ifindex = dev->ifindex;
4517 in_dev = __in_dev_get_rcu(dev);
4518 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
4519 fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
4525 err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4527 goto err_nexthop_neigh_init;
4531 err_nexthop_neigh_init:
4532 list_del(&nh->router_list_node);
4533 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4534 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4538 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
4539 struct mlxsw_sp_nexthop *nh)
4541 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4542 list_del(&nh->router_list_node);
4543 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4544 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4547 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
4548 unsigned long event, struct fib_nh *fib_nh)
4550 struct mlxsw_sp_nexthop_key key;
4551 struct mlxsw_sp_nexthop *nh;
4553 key.fib_nh = fib_nh;
4554 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
4559 case FIB_EVENT_NH_ADD:
4560 mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev);
4562 case FIB_EVENT_NH_DEL:
4563 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4567 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4570 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
4571 struct mlxsw_sp_rif *rif)
4573 struct net_device *dev = mlxsw_sp_rif_dev(rif);
4574 struct mlxsw_sp_nexthop *nh;
4577 list_for_each_entry(nh, &rif->crif->nexthop_list, crif_list_node) {
4579 case MLXSW_SP_NEXTHOP_TYPE_ETH:
4582 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4583 removing = !mlxsw_sp_ipip_netdev_ul_up(dev);
4590 __mlxsw_sp_nexthop_neigh_update(nh, removing);
4591 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4595 static int mlxsw_sp_nexthop_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
4596 struct mlxsw_sp_rif *rif)
4598 struct mlxsw_sp_nexthop *nh, *tmp;
4602 list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list,
4604 err = mlxsw_sp_nexthop_type_rif_made(mlxsw_sp, nh);
4606 goto err_nexthop_type_rif;
4607 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4613 err_nexthop_type_rif:
4614 list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list,
4618 mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh);
4619 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4624 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
4625 struct mlxsw_sp_rif *rif)
4627 struct mlxsw_sp_nexthop *nh, *tmp;
4629 list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list,
4631 mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh);
4632 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4636 static int mlxsw_sp_adj_trap_entry_init(struct mlxsw_sp *mlxsw_sp)
4638 enum mlxsw_reg_ratr_trap_action trap_action;
4639 char ratr_pl[MLXSW_REG_RATR_LEN];
4642 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4643 &mlxsw_sp->router->adj_trap_index);
4647 trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
4648 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
4649 MLXSW_REG_RATR_TYPE_ETHERNET,
4650 mlxsw_sp->router->adj_trap_index,
4651 mlxsw_sp->router->lb_crif->rif->rif_index);
4652 mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
4653 mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
4654 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
4656 goto err_ratr_write;
4661 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4662 mlxsw_sp->router->adj_trap_index);
4666 static void mlxsw_sp_adj_trap_entry_fini(struct mlxsw_sp *mlxsw_sp)
4668 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4669 mlxsw_sp->router->adj_trap_index);
4672 static int mlxsw_sp_nexthop_group_inc(struct mlxsw_sp *mlxsw_sp)
4676 if (refcount_inc_not_zero(&mlxsw_sp->router->num_groups))
4679 err = mlxsw_sp_adj_trap_entry_init(mlxsw_sp);
4683 refcount_set(&mlxsw_sp->router->num_groups, 1);
4688 static void mlxsw_sp_nexthop_group_dec(struct mlxsw_sp *mlxsw_sp)
4690 if (!refcount_dec_and_test(&mlxsw_sp->router->num_groups))
4693 mlxsw_sp_adj_trap_entry_fini(mlxsw_sp);
4697 mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp,
4698 const struct mlxsw_sp_nexthop_group *nh_grp,
4699 unsigned long *activity)
4704 ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL);
4708 mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index,
4709 nh_grp->nhgi->count);
4710 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl);
4714 for (i = 0; i < nh_grp->nhgi->count; i++) {
4715 if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i))
4717 bitmap_set(activity, i, 1);
4724 #define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */
4727 mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp,
4728 const struct mlxsw_sp_nexthop_group *nh_grp)
4730 unsigned long *activity;
4732 activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL);
4736 mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity);
4737 nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
4738 nh_grp->nhgi->count, activity);
4740 bitmap_free(activity);
4744 mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp)
4746 unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
4748 mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw,
4749 msecs_to_jiffies(interval));
4752 static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work)
4754 struct mlxsw_sp_nexthop_group_info *nhgi;
4755 struct mlxsw_sp_router *router;
4756 bool reschedule = false;
4758 router = container_of(work, struct mlxsw_sp_router,
4759 nh_grp_activity_dw.work);
4761 mutex_lock(&router->lock);
4763 list_for_each_entry(nhgi, &router->nh_res_grp_list, list) {
4764 mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp);
4768 mutex_unlock(&router->lock);
4772 mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp);
4776 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
4777 const struct nh_notifier_single_info *nh,
4778 struct netlink_ext_ack *extack)
4783 NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
4784 else if (nh->has_encap)
4785 NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
4793 mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp,
4794 const struct nh_notifier_single_info *nh,
4795 struct netlink_ext_ack *extack)
4799 err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack);
4803 /* Device only nexthops with an IPIP device are programmed as
4804 * encapsulating adjacency entries.
4806 if (!nh->gw_family && !nh->is_reject &&
4807 !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
4808 NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
4816 mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
4817 const struct nh_notifier_grp_info *nh_grp,
4818 struct netlink_ext_ack *extack)
4822 if (nh_grp->is_fdb) {
4823 NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported");
4827 for (i = 0; i < nh_grp->num_nh; i++) {
4828 const struct nh_notifier_single_info *nh;
4831 nh = &nh_grp->nh_entries[i].nh;
4832 err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4842 mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp,
4843 const struct nh_notifier_res_table_info *nh_res_table,
4844 struct netlink_ext_ack *extack)
4846 unsigned int alloc_size;
4847 bool valid_size = false;
4850 if (nh_res_table->num_nh_buckets < 32) {
4851 NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32");
4855 for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
4856 const struct mlxsw_sp_adj_grp_size_range *size_range;
4858 size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
4860 if (nh_res_table->num_nh_buckets >= size_range->start &&
4861 nh_res_table->num_nh_buckets <= size_range->end) {
4868 NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets");
4872 err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
4873 MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4874 nh_res_table->num_nh_buckets,
4876 if (err || nh_res_table->num_nh_buckets != alloc_size) {
4877 NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition");
4885 mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp,
4886 const struct nh_notifier_res_table_info *nh_res_table,
4887 struct netlink_ext_ack *extack)
4892 err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp,
4898 for (i = 0; i < nh_res_table->num_nh_buckets; i++) {
4899 const struct nh_notifier_single_info *nh;
4902 nh = &nh_res_table->nhs[i];
4903 err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4912 static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
4913 unsigned long event,
4914 struct nh_notifier_info *info)
4916 struct nh_notifier_single_info *nh;
4918 if (event != NEXTHOP_EVENT_REPLACE &&
4919 event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE &&
4920 event != NEXTHOP_EVENT_BUCKET_REPLACE)
4923 switch (info->type) {
4924 case NH_NOTIFIER_INFO_TYPE_SINGLE:
4925 return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh,
4927 case NH_NOTIFIER_INFO_TYPE_GRP:
4928 return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp,
4931 case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4932 return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp,
4935 case NH_NOTIFIER_INFO_TYPE_RES_BUCKET:
4936 nh = &info->nh_res_bucket->new_nh;
4937 return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4940 NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
4945 static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
4946 const struct nh_notifier_info *info)
4948 const struct net_device *dev;
4950 switch (info->type) {
4951 case NH_NOTIFIER_INFO_TYPE_SINGLE:
4952 dev = info->nh->dev;
4953 return info->nh->gw_family || info->nh->is_reject ||
4954 mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
4955 case NH_NOTIFIER_INFO_TYPE_GRP:
4956 case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4957 /* Already validated earlier. */
4964 static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
4965 struct mlxsw_sp_nexthop *nh)
4967 nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD;
4968 nh->should_offload = 1;
4969 /* While nexthops that discard packets do not forward packets
4970 * via an egress RIF, they still need to be programmed using a
4971 * valid RIF, so use the loopback RIF created during init.
4973 nh->crif = mlxsw_sp->router->lb_crif;
4976 static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
4977 struct mlxsw_sp_nexthop *nh)
4980 nh->should_offload = 0;
4984 mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
4985 struct mlxsw_sp_nexthop_group *nh_grp,
4986 struct mlxsw_sp_nexthop *nh,
4987 struct nh_notifier_single_info *nh_obj, int weight)
4989 struct net_device *dev = nh_obj->dev;
4992 nh->nhgi = nh_grp->nhgi;
4993 nh->nh_weight = weight;
4995 switch (nh_obj->gw_family) {
4997 memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4));
4998 nh->neigh_tbl = &arp_tbl;
5001 memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6));
5002 #if IS_ENABLED(CONFIG_IPV6)
5003 nh->neigh_tbl = &nd_tbl;
5008 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5009 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5010 nh->ifindex = dev->ifindex;
5012 err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
5016 if (nh_obj->is_reject)
5017 mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
5019 /* In a resilient nexthop group, all the nexthops must be written to
5020 * the adjacency table. Even if they do not have a valid neighbour or
5023 if (nh_grp->nhgi->is_resilient && !nh->should_offload) {
5024 nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
5025 nh->should_offload = 1;
5031 list_del(&nh->router_list_node);
5032 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5036 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
5037 struct mlxsw_sp_nexthop *nh)
5039 if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
5040 mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
5041 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5042 list_del(&nh->router_list_node);
5043 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5044 nh->should_offload = 0;
5048 mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
5049 struct mlxsw_sp_nexthop_group *nh_grp,
5050 struct nh_notifier_info *info)
5052 struct mlxsw_sp_nexthop_group_info *nhgi;
5053 struct mlxsw_sp_nexthop *nh;
5054 bool is_resilient = false;
5058 switch (info->type) {
5059 case NH_NOTIFIER_INFO_TYPE_SINGLE:
5062 case NH_NOTIFIER_INFO_TYPE_GRP:
5063 nhs = info->nh_grp->num_nh;
5065 case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
5066 nhs = info->nh_res_table->num_nh_buckets;
5067 is_resilient = true;
5073 nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
5076 nh_grp->nhgi = nhgi;
5077 nhgi->nh_grp = nh_grp;
5078 nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
5079 nhgi->is_resilient = is_resilient;
5081 for (i = 0; i < nhgi->count; i++) {
5082 struct nh_notifier_single_info *nh_obj;
5085 nh = &nhgi->nexthops[i];
5086 switch (info->type) {
5087 case NH_NOTIFIER_INFO_TYPE_SINGLE:
5091 case NH_NOTIFIER_INFO_TYPE_GRP:
5092 nh_obj = &info->nh_grp->nh_entries[i].nh;
5093 weight = info->nh_grp->nh_entries[i].weight;
5095 case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
5096 nh_obj = &info->nh_res_table->nhs[i];
5101 goto err_nexthop_obj_init;
5103 err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj,
5106 goto err_nexthop_obj_init;
5108 err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
5111 err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5113 NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device");
5114 goto err_group_refresh;
5117 /* Add resilient nexthop groups to a list so that the activity of their
5118 * nexthop buckets will be periodically queried and cleared.
5120 if (nhgi->is_resilient) {
5121 if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
5122 mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp);
5123 list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list);
5129 mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5132 err_nexthop_obj_init:
5133 for (i--; i >= 0; i--) {
5134 nh = &nhgi->nexthops[i];
5135 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5142 mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
5143 struct mlxsw_sp_nexthop_group *nh_grp)
5145 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
5146 struct mlxsw_sp_router *router = mlxsw_sp->router;
5149 if (nhgi->is_resilient) {
5150 list_del(&nhgi->list);
5151 if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
5152 cancel_delayed_work(&router->nh_grp_activity_dw);
5155 mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5156 for (i = nhgi->count - 1; i >= 0; i--) {
5157 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5159 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5161 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5162 WARN_ON_ONCE(nhgi->adj_index_valid);
5166 static struct mlxsw_sp_nexthop_group *
5167 mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp,
5168 struct nh_notifier_info *info)
5170 struct mlxsw_sp_nexthop_group *nh_grp;
5173 nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
5175 return ERR_PTR(-ENOMEM);
5176 INIT_LIST_HEAD(&nh_grp->vr_list);
5177 err = rhashtable_init(&nh_grp->vr_ht,
5178 &mlxsw_sp_nexthop_group_vr_ht_params);
5180 goto err_nexthop_group_vr_ht_init;
5181 INIT_LIST_HEAD(&nh_grp->fib_list);
5182 nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
5183 nh_grp->obj.id = info->id;
5185 err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info);
5187 goto err_nexthop_group_info_init;
5189 nh_grp->can_destroy = false;
5193 err_nexthop_group_info_init:
5194 rhashtable_destroy(&nh_grp->vr_ht);
5195 err_nexthop_group_vr_ht_init:
5197 return ERR_PTR(err);
5201 mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp,
5202 struct mlxsw_sp_nexthop_group *nh_grp)
5204 if (!nh_grp->can_destroy)
5206 mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp);
5207 WARN_ON_ONCE(!list_empty(&nh_grp->fib_list));
5208 WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5209 rhashtable_destroy(&nh_grp->vr_ht);
5213 static struct mlxsw_sp_nexthop_group *
5214 mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
5216 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
5218 cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
5220 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
5222 mlxsw_sp_nexthop_group_ht_params);
5225 static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp,
5226 struct mlxsw_sp_nexthop_group *nh_grp)
5228 return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5232 mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp,
5233 struct mlxsw_sp_nexthop_group *nh_grp,
5234 struct mlxsw_sp_nexthop_group *old_nh_grp,
5235 struct netlink_ext_ack *extack)
5237 struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi;
5238 struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi;
5241 old_nh_grp->nhgi = new_nhgi;
5242 new_nhgi->nh_grp = old_nh_grp;
5243 nh_grp->nhgi = old_nhgi;
5244 old_nhgi->nh_grp = nh_grp;
5246 if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5247 /* Both the old adjacency index and the new one are valid.
5248 * Routes are currently using the old one. Tell the device to
5249 * replace the old adjacency index with the new one.
5251 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp,
5252 old_nhgi->adj_index,
5253 old_nhgi->ecmp_size);
5255 NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one");
5258 } else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) {
5259 /* The old adjacency index is valid, while the new one is not.
5260 * Iterate over all the routes using the group and change them
5261 * to trap packets to the CPU.
5263 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5265 NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets");
5268 } else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5269 /* The old adjacency index is invalid, while the new one is.
5270 * Iterate over all the routes using the group and change them
5271 * to forward packets using the new valid index.
5273 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5275 NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets");
5280 /* Make sure the flags are set / cleared based on the new nexthop group
5283 mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp);
5285 /* At this point 'nh_grp' is just a shell that is not used by anyone
5286 * and its nexthop group info is the old info that was just replaced
5287 * with the new one. Remove it.
5289 nh_grp->can_destroy = true;
5290 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5295 old_nhgi->nh_grp = old_nh_grp;
5296 nh_grp->nhgi = new_nhgi;
5297 new_nhgi->nh_grp = nh_grp;
5298 old_nh_grp->nhgi = old_nhgi;
5302 static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
5303 struct nh_notifier_info *info)
5305 struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp;
5306 struct netlink_ext_ack *extack = info->extack;
5309 nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info);
5311 return PTR_ERR(nh_grp);
5313 old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5315 err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp);
5317 err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp,
5318 old_nh_grp, extack);
5321 nh_grp->can_destroy = true;
5322 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5328 static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
5329 struct nh_notifier_info *info)
5331 struct mlxsw_sp_nexthop_group *nh_grp;
5333 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5337 nh_grp->can_destroy = true;
5338 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5340 /* If the group still has routes using it, then defer the delete
5341 * operation until the last route using it is deleted.
5343 if (!list_empty(&nh_grp->fib_list))
5345 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5348 static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp,
5349 u32 adj_index, char *ratr_pl)
5351 MLXSW_REG_ZERO(ratr, ratr_pl);
5352 mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5353 mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index);
5354 mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16);
5356 return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
5359 static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new)
5361 /* Clear the opcode and activity on both the old and new payload as
5362 * they are irrelevant for the comparison.
5364 mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5365 mlxsw_reg_ratr_a_set(ratr_pl, 0);
5366 mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ);
5367 mlxsw_reg_ratr_a_set(ratr_pl_new, 0);
5369 /* If the contents of the adjacency entry are consistent with the
5370 * replacement request, then replacement was successful.
5372 if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN))
5379 mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
5380 struct mlxsw_sp_nexthop *nh,
5381 struct nh_notifier_info *info)
5383 u16 bucket_index = info->nh_res_bucket->bucket_index;
5384 struct netlink_ext_ack *extack = info->extack;
5385 bool force = info->nh_res_bucket->force;
5386 char ratr_pl_new[MLXSW_REG_RATR_LEN];
5387 char ratr_pl[MLXSW_REG_RATR_LEN];
5391 /* No point in trying an atomic replacement if the idle timer interval
5392 * is smaller than the interval in which we query and clear activity.
5394 if (!force && info->nh_res_bucket->idle_timer_ms <
5395 MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL)
5398 adj_index = nh->nhgi->adj_index + bucket_index;
5399 err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl);
5401 NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket");
5406 err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index,
5409 NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent");
5413 err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new);
5415 NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement");
5422 mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index);
5427 static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp,
5428 struct nh_notifier_info *info)
5430 u16 bucket_index = info->nh_res_bucket->bucket_index;
5431 struct netlink_ext_ack *extack = info->extack;
5432 struct mlxsw_sp_nexthop_group_info *nhgi;
5433 struct nh_notifier_single_info *nh_obj;
5434 struct mlxsw_sp_nexthop_group *nh_grp;
5435 struct mlxsw_sp_nexthop *nh;
5438 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5440 NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found");
5444 nhgi = nh_grp->nhgi;
5446 if (bucket_index >= nhgi->count) {
5447 NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range");
5451 nh = &nhgi->nexthops[bucket_index];
5452 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5454 nh_obj = &info->nh_res_bucket->new_nh;
5455 err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5457 NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement");
5458 goto err_nexthop_obj_init;
5461 err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info);
5463 goto err_nexthop_obj_bucket_adj_update;
5467 err_nexthop_obj_bucket_adj_update:
5468 mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5469 err_nexthop_obj_init:
5470 nh_obj = &info->nh_res_bucket->old_nh;
5471 mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5472 /* The old adjacency entry was not overwritten */
5478 static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
5479 unsigned long event, void *ptr)
5481 struct nh_notifier_info *info = ptr;
5482 struct mlxsw_sp_router *router;
5485 router = container_of(nb, struct mlxsw_sp_router, nexthop_nb);
5486 err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info);
5490 mutex_lock(&router->lock);
5493 case NEXTHOP_EVENT_REPLACE:
5494 err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
5496 case NEXTHOP_EVENT_DEL:
5497 mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
5499 case NEXTHOP_EVENT_BUCKET_REPLACE:
5500 err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
5507 mutex_unlock(&router->lock);
5510 return notifier_from_errno(err);
5513 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5514 struct fib_info *fi)
5516 const struct fib_nh *nh = fib_info_nh(fi, 0);
5518 return nh->fib_nh_gw_family ||
5519 mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
5523 mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp,
5524 struct mlxsw_sp_nexthop_group *nh_grp)
5526 unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi);
5527 struct mlxsw_sp_nexthop_group_info *nhgi;
5528 struct mlxsw_sp_nexthop *nh;
5531 nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
5534 nh_grp->nhgi = nhgi;
5535 nhgi->nh_grp = nh_grp;
5536 nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi);
5538 for (i = 0; i < nhgi->count; i++) {
5539 struct fib_nh *fib_nh;
5541 nh = &nhgi->nexthops[i];
5542 fib_nh = fib_info_nh(nh_grp->ipv4.fi, i);
5543 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
5545 goto err_nexthop4_init;
5547 err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
5550 err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5552 goto err_group_refresh;
5557 mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5561 for (i--; i >= 0; i--) {
5562 nh = &nhgi->nexthops[i];
5563 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5570 mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp,
5571 struct mlxsw_sp_nexthop_group *nh_grp)
5573 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
5576 mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5577 for (i = nhgi->count - 1; i >= 0; i--) {
5578 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5580 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5582 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5583 WARN_ON_ONCE(nhgi->adj_index_valid);
5587 static struct mlxsw_sp_nexthop_group *
5588 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
5590 struct mlxsw_sp_nexthop_group *nh_grp;
5593 nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
5595 return ERR_PTR(-ENOMEM);
5596 INIT_LIST_HEAD(&nh_grp->vr_list);
5597 err = rhashtable_init(&nh_grp->vr_ht,
5598 &mlxsw_sp_nexthop_group_vr_ht_params);
5600 goto err_nexthop_group_vr_ht_init;
5601 INIT_LIST_HEAD(&nh_grp->fib_list);
5602 nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
5603 nh_grp->ipv4.fi = fi;
5606 err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp);
5608 goto err_nexthop_group_info_init;
5610 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5612 goto err_nexthop_group_insert;
5614 nh_grp->can_destroy = true;
5618 err_nexthop_group_insert:
5619 mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5620 err_nexthop_group_info_init:
5622 rhashtable_destroy(&nh_grp->vr_ht);
5623 err_nexthop_group_vr_ht_init:
5625 return ERR_PTR(err);
5629 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
5630 struct mlxsw_sp_nexthop_group *nh_grp)
5632 if (!nh_grp->can_destroy)
5634 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5635 mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5636 fib_info_put(nh_grp->ipv4.fi);
5637 WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5638 rhashtable_destroy(&nh_grp->vr_ht);
5642 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
5643 struct mlxsw_sp_fib_entry *fib_entry,
5644 struct fib_info *fi)
5646 struct mlxsw_sp_nexthop_group *nh_grp;
5649 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
5651 if (WARN_ON_ONCE(!nh_grp))
5656 nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
5658 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
5660 return PTR_ERR(nh_grp);
5663 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
5664 fib_entry->nh_group = nh_grp;
5668 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
5669 struct mlxsw_sp_fib_entry *fib_entry)
5671 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5673 list_del(&fib_entry->nexthop_group_node);
5674 if (!list_empty(&nh_grp->fib_list))
5677 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
5678 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5682 mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
5686 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5688 struct mlxsw_sp_fib4_entry *fib4_entry;
5690 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5692 return !fib4_entry->dscp;
5696 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5698 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5700 switch (fib_entry->fib_node->fib->proto) {
5701 case MLXSW_SP_L3_PROTO_IPV4:
5702 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
5705 case MLXSW_SP_L3_PROTO_IPV6:
5709 switch (fib_entry->type) {
5710 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5711 return !!nh_group->nhgi->adj_index_valid;
5712 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5713 return !!mlxsw_sp_nhgi_rif(nh_group->nhgi);
5714 case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5715 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5716 case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5723 static struct mlxsw_sp_nexthop *
5724 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
5725 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
5729 for (i = 0; i < nh_grp->nhgi->count; i++) {
5730 struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
5731 struct net_device *dev = mlxsw_sp_nexthop_dev(nh);
5732 struct fib6_info *rt = mlxsw_sp_rt6->rt;
5734 if (dev && dev == rt->fib6_nh->fib_nh_dev &&
5735 ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
5736 &rt->fib6_nh->fib_nh_gw6))
5744 mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5745 struct fib_entry_notifier_info *fen_info)
5747 u32 *p_dst = (u32 *) &fen_info->dst;
5748 struct fib_rt_info fri;
5750 fri.fi = fen_info->fi;
5751 fri.tb_id = fen_info->tb_id;
5752 fri.dst = cpu_to_be32(*p_dst);
5753 fri.dst_len = fen_info->dst_len;
5754 fri.dscp = fen_info->dscp;
5755 fri.type = fen_info->type;
5756 fri.offload = false;
5758 fri.offload_failed = true;
5759 fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5763 mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5764 struct mlxsw_sp_fib_entry *fib_entry)
5766 u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5767 int dst_len = fib_entry->fib_node->key.prefix_len;
5768 struct mlxsw_sp_fib4_entry *fib4_entry;
5769 struct fib_rt_info fri;
5770 bool should_offload;
5772 should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5773 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5775 fri.fi = fib4_entry->fi;
5776 fri.tb_id = fib4_entry->tb_id;
5777 fri.dst = cpu_to_be32(*p_dst);
5778 fri.dst_len = dst_len;
5779 fri.dscp = fib4_entry->dscp;
5780 fri.type = fib4_entry->type;
5781 fri.offload = should_offload;
5782 fri.trap = !should_offload;
5783 fri.offload_failed = false;
5784 fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5788 mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5789 struct mlxsw_sp_fib_entry *fib_entry)
5791 u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5792 int dst_len = fib_entry->fib_node->key.prefix_len;
5793 struct mlxsw_sp_fib4_entry *fib4_entry;
5794 struct fib_rt_info fri;
5796 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5798 fri.fi = fib4_entry->fi;
5799 fri.tb_id = fib4_entry->tb_id;
5800 fri.dst = cpu_to_be32(*p_dst);
5801 fri.dst_len = dst_len;
5802 fri.dscp = fib4_entry->dscp;
5803 fri.type = fib4_entry->type;
5804 fri.offload = false;
5806 fri.offload_failed = false;
5807 fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5810 #if IS_ENABLED(CONFIG_IPV6)
5812 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5813 struct fib6_info **rt_arr,
5818 /* In IPv6 a multipath route is represented using multiple routes, so
5819 * we need to set the flags on all of them.
5821 for (i = 0; i < nrt6; i++)
5822 fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i],
5823 false, false, true);
5827 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5828 struct fib6_info **rt_arr,
5834 #if IS_ENABLED(CONFIG_IPV6)
5836 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5837 struct mlxsw_sp_fib_entry *fib_entry)
5839 struct mlxsw_sp_fib6_entry *fib6_entry;
5840 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5841 bool should_offload;
5843 should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5845 /* In IPv6 a multipath route is represented using multiple routes, so
5846 * we need to set the flags on all of them.
5848 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5850 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5851 fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5852 should_offload, !should_offload, false);
5856 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5857 struct mlxsw_sp_fib_entry *fib_entry)
5862 #if IS_ENABLED(CONFIG_IPV6)
5864 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5865 struct mlxsw_sp_fib_entry *fib_entry)
5867 struct mlxsw_sp_fib6_entry *fib6_entry;
5868 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5870 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5872 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5873 fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5874 false, false, false);
5878 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5879 struct mlxsw_sp_fib_entry *fib_entry)
5885 mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5886 struct mlxsw_sp_fib_entry *fib_entry)
5888 switch (fib_entry->fib_node->fib->proto) {
5889 case MLXSW_SP_L3_PROTO_IPV4:
5890 mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
5892 case MLXSW_SP_L3_PROTO_IPV6:
5893 mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
5899 mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5900 struct mlxsw_sp_fib_entry *fib_entry)
5902 switch (fib_entry->fib_node->fib->proto) {
5903 case MLXSW_SP_L3_PROTO_IPV4:
5904 mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5906 case MLXSW_SP_L3_PROTO_IPV6:
5907 mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5913 mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
5914 struct mlxsw_sp_fib_entry *fib_entry,
5915 enum mlxsw_reg_ralue_op op)
5918 case MLXSW_REG_RALUE_OP_WRITE_WRITE:
5919 mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
5921 case MLXSW_REG_RALUE_OP_WRITE_DELETE:
5922 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5930 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
5931 const struct mlxsw_sp_fib_entry *fib_entry,
5932 enum mlxsw_reg_ralue_op op)
5934 struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
5935 enum mlxsw_reg_ralxx_protocol proto;
5938 proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
5940 switch (fib->proto) {
5941 case MLXSW_SP_L3_PROTO_IPV4:
5942 p_dip = (u32 *) fib_entry->fib_node->key.addr;
5943 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
5944 fib_entry->fib_node->key.prefix_len,
5947 case MLXSW_SP_L3_PROTO_IPV6:
5948 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
5949 fib_entry->fib_node->key.prefix_len,
5950 fib_entry->fib_node->key.addr);
5955 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
5956 struct mlxsw_sp_fib_entry *fib_entry,
5957 enum mlxsw_reg_ralue_op op)
5959 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5960 struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi;
5961 char ralue_pl[MLXSW_REG_RALUE_LEN];
5962 enum mlxsw_reg_ralue_trap_action trap_action;
5964 u32 adjacency_index = 0;
5967 /* In case the nexthop group adjacency index is valid, use it
5968 * with provided ECMP size. Otherwise, setup trap and pass
5969 * traffic to kernel.
5971 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5972 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5973 adjacency_index = nhgi->adj_index;
5974 ecmp_size = nhgi->ecmp_size;
5975 } else if (!nhgi->adj_index_valid && nhgi->count &&
5976 mlxsw_sp_nhgi_rif(nhgi)) {
5977 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5978 adjacency_index = mlxsw_sp->router->adj_trap_index;
5981 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5982 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5985 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5986 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
5987 adjacency_index, ecmp_size);
5988 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5991 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
5992 struct mlxsw_sp_fib_entry *fib_entry,
5993 enum mlxsw_reg_ralue_op op)
5995 struct mlxsw_sp_rif *rif = mlxsw_sp_nhgi_rif(fib_entry->nh_group->nhgi);
5996 enum mlxsw_reg_ralue_trap_action trap_action;
5997 char ralue_pl[MLXSW_REG_RALUE_LEN];
6001 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
6002 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
6003 rif_index = rif->rif_index;
6005 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
6006 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
6009 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6010 mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
6012 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6015 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
6016 struct mlxsw_sp_fib_entry *fib_entry,
6017 enum mlxsw_reg_ralue_op op)
6019 char ralue_pl[MLXSW_REG_RALUE_LEN];
6021 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6022 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
6023 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6026 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
6027 struct mlxsw_sp_fib_entry *fib_entry,
6028 enum mlxsw_reg_ralue_op op)
6030 enum mlxsw_reg_ralue_trap_action trap_action;
6031 char ralue_pl[MLXSW_REG_RALUE_LEN];
6033 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
6034 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6035 mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
6036 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6040 mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
6041 struct mlxsw_sp_fib_entry *fib_entry,
6042 enum mlxsw_reg_ralue_op op)
6044 enum mlxsw_reg_ralue_trap_action trap_action;
6045 char ralue_pl[MLXSW_REG_RALUE_LEN];
6048 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
6049 trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
6051 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6052 mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0);
6053 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6057 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
6058 struct mlxsw_sp_fib_entry *fib_entry,
6059 enum mlxsw_reg_ralue_op op)
6061 struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
6062 const struct mlxsw_sp_ipip_ops *ipip_ops;
6063 char ralue_pl[MLXSW_REG_RALUE_LEN];
6066 if (WARN_ON(!ipip_entry))
6069 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
6070 err = ipip_ops->decap_config(mlxsw_sp, ipip_entry,
6071 fib_entry->decap.tunnel_index);
6075 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6076 mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
6077 fib_entry->decap.tunnel_index);
6078 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6081 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
6082 struct mlxsw_sp_fib_entry *fib_entry,
6083 enum mlxsw_reg_ralue_op op)
6085 char ralue_pl[MLXSW_REG_RALUE_LEN];
6087 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6088 mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
6089 fib_entry->decap.tunnel_index);
6090 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6093 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
6094 struct mlxsw_sp_fib_entry *fib_entry,
6095 enum mlxsw_reg_ralue_op op)
6097 switch (fib_entry->type) {
6098 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
6099 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
6100 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
6101 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
6102 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
6103 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
6104 case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
6105 return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
6106 case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
6107 return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry,
6109 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
6110 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
6112 case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
6113 return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
6118 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
6119 struct mlxsw_sp_fib_entry *fib_entry,
6120 enum mlxsw_reg_ralue_op op)
6122 int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
6127 mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
6132 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
6133 struct mlxsw_sp_fib_entry *fib_entry)
6135 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
6136 MLXSW_REG_RALUE_OP_WRITE_WRITE);
6139 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
6140 struct mlxsw_sp_fib_entry *fib_entry)
6142 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
6143 MLXSW_REG_RALUE_OP_WRITE_DELETE);
6147 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
6148 const struct fib_entry_notifier_info *fen_info,
6149 struct mlxsw_sp_fib_entry *fib_entry)
6151 struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
6152 union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
6153 struct mlxsw_sp_router *router = mlxsw_sp->router;
6154 u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
6155 int ifindex = nhgi->nexthops[0].ifindex;
6156 struct mlxsw_sp_ipip_entry *ipip_entry;
6158 switch (fen_info->type) {
6160 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
6161 MLXSW_SP_L3_PROTO_IPV4, dip);
6162 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
6163 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
6164 return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
6168 if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
6169 MLXSW_SP_L3_PROTO_IPV4,
6173 tunnel_index = router->nve_decap_config.tunnel_index;
6174 fib_entry->decap.tunnel_index = tunnel_index;
6175 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
6180 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6183 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
6185 case RTN_UNREACHABLE:
6187 /* Packets hitting these routes need to be trapped, but
6188 * can do so with a lower priority than packets directed
6189 * at the host, so use action type local instead of trap.
6191 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6195 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
6197 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
6205 mlxsw_sp_fib_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6206 struct mlxsw_sp_fib_entry *fib_entry)
6208 switch (fib_entry->type) {
6209 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
6210 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
6218 mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6219 struct mlxsw_sp_fib4_entry *fib4_entry)
6221 mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib4_entry->common);
6224 static struct mlxsw_sp_fib4_entry *
6225 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
6226 struct mlxsw_sp_fib_node *fib_node,
6227 const struct fib_entry_notifier_info *fen_info)
6229 struct mlxsw_sp_fib4_entry *fib4_entry;
6230 struct mlxsw_sp_fib_entry *fib_entry;
6233 fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
6235 return ERR_PTR(-ENOMEM);
6236 fib_entry = &fib4_entry->common;
6238 err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
6240 goto err_nexthop4_group_get;
6242 err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6245 goto err_nexthop_group_vr_link;
6247 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
6249 goto err_fib4_entry_type_set;
6251 fib4_entry->fi = fen_info->fi;
6252 fib_info_hold(fib4_entry->fi);
6253 fib4_entry->tb_id = fen_info->tb_id;
6254 fib4_entry->type = fen_info->type;
6255 fib4_entry->dscp = fen_info->dscp;
6257 fib_entry->fib_node = fib_node;
6261 err_fib4_entry_type_set:
6262 mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
6263 err_nexthop_group_vr_link:
6264 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6265 err_nexthop4_group_get:
6267 return ERR_PTR(err);
6270 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6271 struct mlxsw_sp_fib4_entry *fib4_entry)
6273 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6275 fib_info_put(fib4_entry->fi);
6276 mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib4_entry);
6277 mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group,
6279 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6283 static struct mlxsw_sp_fib4_entry *
6284 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
6285 const struct fib_entry_notifier_info *fen_info)
6287 struct mlxsw_sp_fib4_entry *fib4_entry;
6288 struct mlxsw_sp_fib_node *fib_node;
6289 struct mlxsw_sp_fib *fib;
6290 struct mlxsw_sp_vr *vr;
6292 vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
6295 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
6297 fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
6298 sizeof(fen_info->dst),
6303 fib4_entry = container_of(fib_node->fib_entry,
6304 struct mlxsw_sp_fib4_entry, common);
6305 if (fib4_entry->tb_id == fen_info->tb_id &&
6306 fib4_entry->dscp == fen_info->dscp &&
6307 fib4_entry->type == fen_info->type &&
6308 fib4_entry->fi == fen_info->fi)
6314 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
6315 .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
6316 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
6317 .key_len = sizeof(struct mlxsw_sp_fib_key),
6318 .automatic_shrinking = true,
6321 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
6322 struct mlxsw_sp_fib_node *fib_node)
6324 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
6325 mlxsw_sp_fib_ht_params);
6328 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
6329 struct mlxsw_sp_fib_node *fib_node)
6331 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
6332 mlxsw_sp_fib_ht_params);
6335 static struct mlxsw_sp_fib_node *
6336 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
6337 size_t addr_len, unsigned char prefix_len)
6339 struct mlxsw_sp_fib_key key;
6341 memset(&key, 0, sizeof(key));
6342 memcpy(key.addr, addr, addr_len);
6343 key.prefix_len = prefix_len;
6344 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
6347 static struct mlxsw_sp_fib_node *
6348 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
6349 size_t addr_len, unsigned char prefix_len)
6351 struct mlxsw_sp_fib_node *fib_node;
6353 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
6357 list_add(&fib_node->list, &fib->node_list);
6358 memcpy(fib_node->key.addr, addr, addr_len);
6359 fib_node->key.prefix_len = prefix_len;
6364 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
6366 list_del(&fib_node->list);
6370 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
6371 struct mlxsw_sp_fib_node *fib_node)
6373 struct mlxsw_sp_prefix_usage req_prefix_usage;
6374 struct mlxsw_sp_fib *fib = fib_node->fib;
6375 struct mlxsw_sp_lpm_tree *lpm_tree;
6378 lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
6379 if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6382 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6383 mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
6384 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6386 if (IS_ERR(lpm_tree))
6387 return PTR_ERR(lpm_tree);
6389 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6391 goto err_lpm_tree_replace;
6394 lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
6397 err_lpm_tree_replace:
6398 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6402 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
6403 struct mlxsw_sp_fib_node *fib_node)
6405 struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
6406 struct mlxsw_sp_prefix_usage req_prefix_usage;
6407 struct mlxsw_sp_fib *fib = fib_node->fib;
6410 if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6412 /* Try to construct a new LPM tree from the current prefix usage
6413 * minus the unused one. If we fail, continue using the old one.
6415 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6416 mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
6417 fib_node->key.prefix_len);
6418 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6420 if (IS_ERR(lpm_tree))
6423 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6425 goto err_lpm_tree_replace;
6429 err_lpm_tree_replace:
6430 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6433 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
6434 struct mlxsw_sp_fib_node *fib_node,
6435 struct mlxsw_sp_fib *fib)
6439 err = mlxsw_sp_fib_node_insert(fib, fib_node);
6442 fib_node->fib = fib;
6444 err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
6446 goto err_fib_lpm_tree_link;
6450 err_fib_lpm_tree_link:
6451 fib_node->fib = NULL;
6452 mlxsw_sp_fib_node_remove(fib, fib_node);
6456 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
6457 struct mlxsw_sp_fib_node *fib_node)
6459 struct mlxsw_sp_fib *fib = fib_node->fib;
6461 mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
6462 fib_node->fib = NULL;
6463 mlxsw_sp_fib_node_remove(fib, fib_node);
6466 static struct mlxsw_sp_fib_node *
6467 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
6468 size_t addr_len, unsigned char prefix_len,
6469 enum mlxsw_sp_l3proto proto)
6471 struct mlxsw_sp_fib_node *fib_node;
6472 struct mlxsw_sp_fib *fib;
6473 struct mlxsw_sp_vr *vr;
6476 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
6478 return ERR_CAST(vr);
6479 fib = mlxsw_sp_vr_fib(vr, proto);
6481 fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
6485 fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
6488 goto err_fib_node_create;
6491 err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
6493 goto err_fib_node_init;
6498 mlxsw_sp_fib_node_destroy(fib_node);
6499 err_fib_node_create:
6500 mlxsw_sp_vr_put(mlxsw_sp, vr);
6501 return ERR_PTR(err);
6504 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
6505 struct mlxsw_sp_fib_node *fib_node)
6507 struct mlxsw_sp_vr *vr = fib_node->fib->vr;
6509 if (fib_node->fib_entry)
6511 mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
6512 mlxsw_sp_fib_node_destroy(fib_node);
6513 mlxsw_sp_vr_put(mlxsw_sp, vr);
6516 static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
6517 struct mlxsw_sp_fib_entry *fib_entry)
6519 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6522 fib_node->fib_entry = fib_entry;
6524 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
6526 goto err_fib_entry_update;
6530 err_fib_entry_update:
6531 fib_node->fib_entry = NULL;
6536 mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
6537 struct mlxsw_sp_fib_entry *fib_entry)
6539 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6541 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
6542 fib_node->fib_entry = NULL;
6545 static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
6547 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6548 struct mlxsw_sp_fib4_entry *fib4_replaced;
6550 if (!fib_node->fib_entry)
6553 fib4_replaced = container_of(fib_node->fib_entry,
6554 struct mlxsw_sp_fib4_entry, common);
6555 if (fib4_entry->tb_id == RT_TABLE_MAIN &&
6556 fib4_replaced->tb_id == RT_TABLE_LOCAL)
6563 mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
6564 const struct fib_entry_notifier_info *fen_info)
6566 struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
6567 struct mlxsw_sp_fib_entry *replaced;
6568 struct mlxsw_sp_fib_node *fib_node;
6571 if (fen_info->fi->nh &&
6572 !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id))
6575 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
6576 &fen_info->dst, sizeof(fen_info->dst),
6578 MLXSW_SP_L3_PROTO_IPV4);
6579 if (IS_ERR(fib_node)) {
6580 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
6581 return PTR_ERR(fib_node);
6584 fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
6585 if (IS_ERR(fib4_entry)) {
6586 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
6587 err = PTR_ERR(fib4_entry);
6588 goto err_fib4_entry_create;
6591 if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
6592 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6593 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6597 replaced = fib_node->fib_entry;
6598 err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
6600 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
6601 goto err_fib_node_entry_link;
6604 /* Nothing to replace */
6608 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
6609 fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
6611 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
6615 err_fib_node_entry_link:
6616 fib_node->fib_entry = replaced;
6617 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6618 err_fib4_entry_create:
6619 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6623 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
6624 struct fib_entry_notifier_info *fen_info)
6626 struct mlxsw_sp_fib4_entry *fib4_entry;
6627 struct mlxsw_sp_fib_node *fib_node;
6629 fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
6632 fib_node = fib4_entry->common.fib_node;
6634 mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
6635 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6636 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6639 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
6641 /* Multicast routes aren't supported, so ignore them. Neighbour
6642 * Discovery packets are specifically trapped.
6644 if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
6647 /* Cloned routes are irrelevant in the forwarding path. */
6648 if (rt->fib6_flags & RTF_CACHE)
6654 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
6656 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6658 mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
6660 return ERR_PTR(-ENOMEM);
6662 /* In case of route replace, replaced route is deleted with
6663 * no notification. Take reference to prevent accessing freed
6666 mlxsw_sp_rt6->rt = rt;
6669 return mlxsw_sp_rt6;
6672 #if IS_ENABLED(CONFIG_IPV6)
6673 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6675 fib6_info_release(rt);
6678 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6683 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
6685 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
6687 if (!mlxsw_sp_rt6->rt->nh)
6688 fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
6689 mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
6690 kfree(mlxsw_sp_rt6);
6693 static struct fib6_info *
6694 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
6696 return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
6700 static struct mlxsw_sp_rt6 *
6701 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
6702 const struct fib6_info *rt)
6704 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6706 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
6707 if (mlxsw_sp_rt6->rt == rt)
6708 return mlxsw_sp_rt6;
6714 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
6715 const struct fib6_info *rt,
6716 enum mlxsw_sp_ipip_type *ret)
6718 return rt->fib6_nh->fib_nh_dev &&
6719 mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
6722 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
6723 struct mlxsw_sp_nexthop_group *nh_grp,
6724 struct mlxsw_sp_nexthop *nh,
6725 const struct fib6_info *rt)
6727 struct net_device *dev = rt->fib6_nh->fib_nh_dev;
6730 nh->nhgi = nh_grp->nhgi;
6731 nh->nh_weight = rt->fib6_nh->fib_nh_weight;
6732 memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
6733 #if IS_ENABLED(CONFIG_IPV6)
6734 nh->neigh_tbl = &nd_tbl;
6736 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
6738 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
6742 nh->ifindex = dev->ifindex;
6744 err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
6746 goto err_nexthop_type_init;
6750 err_nexthop_type_init:
6751 list_del(&nh->router_list_node);
6752 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6756 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
6757 struct mlxsw_sp_nexthop *nh)
6759 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
6760 list_del(&nh->router_list_node);
6761 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6764 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
6765 const struct fib6_info *rt)
6767 return rt->fib6_nh->fib_nh_gw_family ||
6768 mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
6772 mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp,
6773 struct mlxsw_sp_nexthop_group *nh_grp,
6774 struct mlxsw_sp_fib6_entry *fib6_entry)
6776 struct mlxsw_sp_nexthop_group_info *nhgi;
6777 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6778 struct mlxsw_sp_nexthop *nh;
6781 nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6),
6785 nh_grp->nhgi = nhgi;
6786 nhgi->nh_grp = nh_grp;
6787 mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
6788 struct mlxsw_sp_rt6, list);
6789 nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
6790 nhgi->count = fib6_entry->nrt6;
6791 for (i = 0; i < nhgi->count; i++) {
6792 struct fib6_info *rt = mlxsw_sp_rt6->rt;
6794 nh = &nhgi->nexthops[i];
6795 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
6797 goto err_nexthop6_init;
6798 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
6800 nh_grp->nhgi = nhgi;
6801 err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
6804 err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6806 goto err_group_refresh;
6811 mlxsw_sp_nexthop_group_dec(mlxsw_sp);
6815 for (i--; i >= 0; i--) {
6816 nh = &nhgi->nexthops[i];
6817 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6824 mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp,
6825 struct mlxsw_sp_nexthop_group *nh_grp)
6827 struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
6830 mlxsw_sp_nexthop_group_dec(mlxsw_sp);
6831 for (i = nhgi->count - 1; i >= 0; i--) {
6832 struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
6834 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6836 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6837 WARN_ON_ONCE(nhgi->adj_index_valid);
6841 static struct mlxsw_sp_nexthop_group *
6842 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
6843 struct mlxsw_sp_fib6_entry *fib6_entry)
6845 struct mlxsw_sp_nexthop_group *nh_grp;
6848 nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
6850 return ERR_PTR(-ENOMEM);
6851 INIT_LIST_HEAD(&nh_grp->vr_list);
6852 err = rhashtable_init(&nh_grp->vr_ht,
6853 &mlxsw_sp_nexthop_group_vr_ht_params);
6855 goto err_nexthop_group_vr_ht_init;
6856 INIT_LIST_HEAD(&nh_grp->fib_list);
6857 nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
6859 err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry);
6861 goto err_nexthop_group_info_init;
6863 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
6865 goto err_nexthop_group_insert;
6867 nh_grp->can_destroy = true;
6871 err_nexthop_group_insert:
6872 mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6873 err_nexthop_group_info_init:
6874 rhashtable_destroy(&nh_grp->vr_ht);
6875 err_nexthop_group_vr_ht_init:
6877 return ERR_PTR(err);
6881 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
6882 struct mlxsw_sp_nexthop_group *nh_grp)
6884 if (!nh_grp->can_destroy)
6886 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
6887 mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6888 WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
6889 rhashtable_destroy(&nh_grp->vr_ht);
6893 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
6894 struct mlxsw_sp_fib6_entry *fib6_entry)
6896 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6897 struct mlxsw_sp_nexthop_group *nh_grp;
6900 nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
6902 if (WARN_ON_ONCE(!nh_grp))
6907 nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
6909 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
6911 return PTR_ERR(nh_grp);
6914 /* The route and the nexthop are described by the same struct, so we
6915 * need to the update the nexthop offload indication for the new route.
6917 __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
6920 list_add_tail(&fib6_entry->common.nexthop_group_node,
6922 fib6_entry->common.nh_group = nh_grp;
6927 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
6928 struct mlxsw_sp_fib_entry *fib_entry)
6930 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
6932 list_del(&fib_entry->nexthop_group_node);
6933 if (!list_empty(&nh_grp->fib_list))
6936 if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
6937 mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
6941 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
6945 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
6946 struct mlxsw_sp_fib6_entry *fib6_entry)
6948 struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
6949 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6952 mlxsw_sp_nexthop_group_vr_unlink(old_nh_grp, fib_node->fib);
6953 fib6_entry->common.nh_group = NULL;
6954 list_del(&fib6_entry->common.nexthop_group_node);
6956 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6958 goto err_nexthop6_group_get;
6960 err = mlxsw_sp_nexthop_group_vr_link(fib6_entry->common.nh_group,
6963 goto err_nexthop_group_vr_link;
6965 /* In case this entry is offloaded, then the adjacency index
6966 * currently associated with it in the device's table is that
6967 * of the old group. Start using the new one instead.
6969 err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
6971 goto err_fib_entry_update;
6973 if (list_empty(&old_nh_grp->fib_list))
6974 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
6978 err_fib_entry_update:
6979 mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6981 err_nexthop_group_vr_link:
6982 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6983 err_nexthop6_group_get:
6984 list_add_tail(&fib6_entry->common.nexthop_group_node,
6985 &old_nh_grp->fib_list);
6986 fib6_entry->common.nh_group = old_nh_grp;
6987 mlxsw_sp_nexthop_group_vr_link(old_nh_grp, fib_node->fib);
6992 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
6993 struct mlxsw_sp_fib6_entry *fib6_entry,
6994 struct fib6_info **rt_arr, unsigned int nrt6)
6996 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6999 for (i = 0; i < nrt6; i++) {
7000 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
7001 if (IS_ERR(mlxsw_sp_rt6)) {
7002 err = PTR_ERR(mlxsw_sp_rt6);
7003 goto err_rt6_unwind;
7006 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
7010 err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
7012 goto err_rt6_unwind;
7017 for (; i > 0; i--) {
7019 mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
7020 struct mlxsw_sp_rt6, list);
7021 list_del(&mlxsw_sp_rt6->list);
7022 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7028 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
7029 struct mlxsw_sp_fib6_entry *fib6_entry,
7030 struct fib6_info **rt_arr, unsigned int nrt6)
7032 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
7035 for (i = 0; i < nrt6; i++) {
7036 mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
7038 if (WARN_ON_ONCE(!mlxsw_sp_rt6))
7042 list_del(&mlxsw_sp_rt6->list);
7043 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7046 mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
7050 mlxsw_sp_fib6_entry_type_set_local(struct mlxsw_sp *mlxsw_sp,
7051 struct mlxsw_sp_fib_entry *fib_entry,
7052 const struct fib6_info *rt)
7054 struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
7055 union mlxsw_sp_l3addr dip = { .addr6 = rt->fib6_dst.addr };
7056 u32 tb_id = mlxsw_sp_fix_tb_id(rt->fib6_table->tb6_id);
7057 struct mlxsw_sp_router *router = mlxsw_sp->router;
7058 int ifindex = nhgi->nexthops[0].ifindex;
7059 struct mlxsw_sp_ipip_entry *ipip_entry;
7061 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
7062 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
7063 MLXSW_SP_L3_PROTO_IPV6,
7066 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
7067 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
7068 return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry,
7071 if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
7072 MLXSW_SP_L3_PROTO_IPV6, &dip)) {
7075 tunnel_index = router->nve_decap_config.tunnel_index;
7076 fib_entry->decap.tunnel_index = tunnel_index;
7077 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
7083 static int mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
7084 struct mlxsw_sp_fib_entry *fib_entry,
7085 const struct fib6_info *rt)
7087 if (rt->fib6_flags & RTF_LOCAL)
7088 return mlxsw_sp_fib6_entry_type_set_local(mlxsw_sp, fib_entry,
7090 if (rt->fib6_flags & RTF_ANYCAST)
7091 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
7092 else if (rt->fib6_type == RTN_BLACKHOLE)
7093 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
7094 else if (rt->fib6_flags & RTF_REJECT)
7095 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
7096 else if (fib_entry->nh_group->nhgi->gateway)
7097 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
7099 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
7105 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
7107 struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
7109 list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
7112 list_del(&mlxsw_sp_rt6->list);
7113 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7117 static struct mlxsw_sp_fib6_entry *
7118 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
7119 struct mlxsw_sp_fib_node *fib_node,
7120 struct fib6_info **rt_arr, unsigned int nrt6)
7122 struct mlxsw_sp_fib6_entry *fib6_entry;
7123 struct mlxsw_sp_fib_entry *fib_entry;
7124 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
7127 fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
7129 return ERR_PTR(-ENOMEM);
7130 fib_entry = &fib6_entry->common;
7132 INIT_LIST_HEAD(&fib6_entry->rt6_list);
7134 for (i = 0; i < nrt6; i++) {
7135 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
7136 if (IS_ERR(mlxsw_sp_rt6)) {
7137 err = PTR_ERR(mlxsw_sp_rt6);
7138 goto err_rt6_unwind;
7140 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
7144 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
7146 goto err_rt6_unwind;
7148 err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
7151 goto err_nexthop_group_vr_link;
7153 err = mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
7155 goto err_fib6_entry_type_set;
7157 fib_entry->fib_node = fib_node;
7161 err_fib6_entry_type_set:
7162 mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
7163 err_nexthop_group_vr_link:
7164 mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry);
7166 for (; i > 0; i--) {
7168 mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
7169 struct mlxsw_sp_rt6, list);
7170 list_del(&mlxsw_sp_rt6->list);
7171 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7174 return ERR_PTR(err);
7178 mlxsw_sp_fib6_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
7179 struct mlxsw_sp_fib6_entry *fib6_entry)
7181 mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib6_entry->common);
7184 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
7185 struct mlxsw_sp_fib6_entry *fib6_entry)
7187 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7189 mlxsw_sp_fib6_entry_type_unset(mlxsw_sp, fib6_entry);
7190 mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
7192 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
7193 mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
7194 WARN_ON(fib6_entry->nrt6);
7198 static struct mlxsw_sp_fib6_entry *
7199 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
7200 const struct fib6_info *rt)
7202 struct mlxsw_sp_fib6_entry *fib6_entry;
7203 struct mlxsw_sp_fib_node *fib_node;
7204 struct mlxsw_sp_fib *fib;
7205 struct fib6_info *cmp_rt;
7206 struct mlxsw_sp_vr *vr;
7208 vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
7211 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
7213 fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
7214 sizeof(rt->fib6_dst.addr),
7219 fib6_entry = container_of(fib_node->fib_entry,
7220 struct mlxsw_sp_fib6_entry, common);
7221 cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7222 if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
7223 rt->fib6_metric == cmp_rt->fib6_metric &&
7224 mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
7230 static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
7232 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7233 struct mlxsw_sp_fib6_entry *fib6_replaced;
7234 struct fib6_info *rt, *rt_replaced;
7236 if (!fib_node->fib_entry)
7239 fib6_replaced = container_of(fib_node->fib_entry,
7240 struct mlxsw_sp_fib6_entry,
7242 rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7243 rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
7244 if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
7245 rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
7251 static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
7252 struct fib6_info **rt_arr,
7255 struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
7256 struct mlxsw_sp_fib_entry *replaced;
7257 struct mlxsw_sp_fib_node *fib_node;
7258 struct fib6_info *rt = rt_arr[0];
7261 if (rt->fib6_src.plen)
7264 if (mlxsw_sp_fib6_rt_should_ignore(rt))
7267 if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id))
7270 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7272 sizeof(rt->fib6_dst.addr),
7274 MLXSW_SP_L3_PROTO_IPV6);
7275 if (IS_ERR(fib_node))
7276 return PTR_ERR(fib_node);
7278 fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
7280 if (IS_ERR(fib6_entry)) {
7281 err = PTR_ERR(fib6_entry);
7282 goto err_fib6_entry_create;
7285 if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
7286 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7287 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7291 replaced = fib_node->fib_entry;
7292 err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
7294 goto err_fib_node_entry_link;
7296 /* Nothing to replace */
7300 mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
7301 fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
7303 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
7307 err_fib_node_entry_link:
7308 fib_node->fib_entry = replaced;
7309 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7310 err_fib6_entry_create:
7311 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7315 static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
7316 struct fib6_info **rt_arr,
7319 struct mlxsw_sp_fib6_entry *fib6_entry;
7320 struct mlxsw_sp_fib_node *fib_node;
7321 struct fib6_info *rt = rt_arr[0];
7324 if (rt->fib6_src.plen)
7327 if (mlxsw_sp_fib6_rt_should_ignore(rt))
7330 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7332 sizeof(rt->fib6_dst.addr),
7334 MLXSW_SP_L3_PROTO_IPV6);
7335 if (IS_ERR(fib_node))
7336 return PTR_ERR(fib_node);
7338 if (WARN_ON_ONCE(!fib_node->fib_entry)) {
7339 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7343 fib6_entry = container_of(fib_node->fib_entry,
7344 struct mlxsw_sp_fib6_entry, common);
7345 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
7348 goto err_fib6_entry_nexthop_add;
7352 err_fib6_entry_nexthop_add:
7353 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7357 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
7358 struct fib6_info **rt_arr,
7361 struct mlxsw_sp_fib6_entry *fib6_entry;
7362 struct mlxsw_sp_fib_node *fib_node;
7363 struct fib6_info *rt = rt_arr[0];
7365 if (mlxsw_sp_fib6_rt_should_ignore(rt))
7368 /* Multipath routes are first added to the FIB trie and only then
7369 * notified. If we vetoed the addition, we will get a delete
7370 * notification for a route we do not have. Therefore, do not warn if
7371 * route was not found.
7373 fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
7377 /* If not all the nexthops are deleted, then only reduce the nexthop
7380 if (nrt6 != fib6_entry->nrt6) {
7381 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
7386 fib_node = fib6_entry->common.fib_node;
7388 mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
7389 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7390 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7393 static struct mlxsw_sp_mr_table *
7394 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
7396 if (family == RTNL_FAMILY_IPMR)
7397 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
7399 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
7402 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
7403 struct mfc_entry_notifier_info *men_info,
7406 struct mlxsw_sp_mr_table *mrt;
7407 struct mlxsw_sp_vr *vr;
7409 vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
7413 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7414 return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
7417 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
7418 struct mfc_entry_notifier_info *men_info)
7420 struct mlxsw_sp_mr_table *mrt;
7421 struct mlxsw_sp_vr *vr;
7423 vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
7427 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7428 mlxsw_sp_mr_route_del(mrt, men_info->mfc);
7429 mlxsw_sp_vr_put(mlxsw_sp, vr);
7433 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
7434 struct vif_entry_notifier_info *ven_info)
7436 struct mlxsw_sp_mr_table *mrt;
7437 struct mlxsw_sp_rif *rif;
7438 struct mlxsw_sp_vr *vr;
7440 vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
7444 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7445 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
7446 return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
7447 ven_info->vif_index,
7448 ven_info->vif_flags, rif);
7452 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
7453 struct vif_entry_notifier_info *ven_info)
7455 struct mlxsw_sp_mr_table *mrt;
7456 struct mlxsw_sp_vr *vr;
7458 vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
7462 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7463 mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
7464 mlxsw_sp_vr_put(mlxsw_sp, vr);
7467 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
7468 struct mlxsw_sp_fib_node *fib_node)
7470 struct mlxsw_sp_fib4_entry *fib4_entry;
7472 fib4_entry = container_of(fib_node->fib_entry,
7473 struct mlxsw_sp_fib4_entry, common);
7474 mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7475 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
7476 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7479 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
7480 struct mlxsw_sp_fib_node *fib_node)
7482 struct mlxsw_sp_fib6_entry *fib6_entry;
7484 fib6_entry = container_of(fib_node->fib_entry,
7485 struct mlxsw_sp_fib6_entry, common);
7486 mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7487 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7488 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7491 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
7492 struct mlxsw_sp_fib_node *fib_node)
7494 switch (fib_node->fib->proto) {
7495 case MLXSW_SP_L3_PROTO_IPV4:
7496 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
7498 case MLXSW_SP_L3_PROTO_IPV6:
7499 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
7504 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
7505 struct mlxsw_sp_vr *vr,
7506 enum mlxsw_sp_l3proto proto)
7508 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
7509 struct mlxsw_sp_fib_node *fib_node, *tmp;
7511 list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
7512 bool do_break = &tmp->list == &fib->node_list;
7514 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
7520 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
7522 int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
7525 for (i = 0; i < max_vrs; i++) {
7526 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
7528 if (!mlxsw_sp_vr_is_used(vr))
7531 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
7532 mlxsw_sp_mr_table_flush(vr->mr_table[j]);
7533 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
7535 /* If virtual router was only used for IPv4, then it's no
7538 if (!mlxsw_sp_vr_is_used(vr))
7540 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
7544 struct mlxsw_sp_fib6_event_work {
7545 struct fib6_info **rt_arr;
7549 struct mlxsw_sp_fib_event_work {
7550 struct work_struct work;
7551 netdevice_tracker dev_tracker;
7553 struct mlxsw_sp_fib6_event_work fib6_work;
7554 struct fib_entry_notifier_info fen_info;
7555 struct fib_rule_notifier_info fr_info;
7556 struct fib_nh_notifier_info fnh_info;
7557 struct mfc_entry_notifier_info men_info;
7558 struct vif_entry_notifier_info ven_info;
7560 struct mlxsw_sp *mlxsw_sp;
7561 unsigned long event;
7565 mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
7566 struct fib6_entry_notifier_info *fen6_info)
7568 struct fib6_info *rt = fen6_info->rt;
7569 struct fib6_info **rt_arr;
7570 struct fib6_info *iter;
7574 nrt6 = fen6_info->nsiblings + 1;
7576 rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
7580 fib6_work->rt_arr = rt_arr;
7581 fib6_work->nrt6 = nrt6;
7586 if (!fen6_info->nsiblings)
7589 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
7590 if (i == fen6_info->nsiblings)
7593 rt_arr[i + 1] = iter;
7594 fib6_info_hold(iter);
7597 WARN_ON_ONCE(i != fen6_info->nsiblings);
7603 mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
7607 for (i = 0; i < fib6_work->nrt6; i++)
7608 mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
7609 kfree(fib6_work->rt_arr);
7612 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
7614 struct mlxsw_sp_fib_event_work *fib_work =
7615 container_of(work, struct mlxsw_sp_fib_event_work, work);
7616 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7619 mutex_lock(&mlxsw_sp->router->lock);
7620 mlxsw_sp_span_respin(mlxsw_sp);
7622 switch (fib_work->event) {
7623 case FIB_EVENT_ENTRY_REPLACE:
7624 err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
7625 &fib_work->fen_info);
7627 dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7628 mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp,
7629 &fib_work->fen_info);
7631 fib_info_put(fib_work->fen_info.fi);
7633 case FIB_EVENT_ENTRY_DEL:
7634 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
7635 fib_info_put(fib_work->fen_info.fi);
7637 case FIB_EVENT_NH_ADD:
7638 case FIB_EVENT_NH_DEL:
7639 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
7640 fib_work->fnh_info.fib_nh);
7641 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
7644 mutex_unlock(&mlxsw_sp->router->lock);
7648 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
7650 struct mlxsw_sp_fib_event_work *fib_work =
7651 container_of(work, struct mlxsw_sp_fib_event_work, work);
7652 struct mlxsw_sp_fib6_event_work *fib6_work = &fib_work->fib6_work;
7653 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7656 mutex_lock(&mlxsw_sp->router->lock);
7657 mlxsw_sp_span_respin(mlxsw_sp);
7659 switch (fib_work->event) {
7660 case FIB_EVENT_ENTRY_REPLACE:
7661 err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
7665 dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7666 mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7670 mlxsw_sp_router_fib6_work_fini(fib6_work);
7672 case FIB_EVENT_ENTRY_APPEND:
7673 err = mlxsw_sp_router_fib6_append(mlxsw_sp,
7677 dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n");
7678 mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7682 mlxsw_sp_router_fib6_work_fini(fib6_work);
7684 case FIB_EVENT_ENTRY_DEL:
7685 mlxsw_sp_router_fib6_del(mlxsw_sp,
7688 mlxsw_sp_router_fib6_work_fini(fib6_work);
7691 mutex_unlock(&mlxsw_sp->router->lock);
7695 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
7697 struct mlxsw_sp_fib_event_work *fib_work =
7698 container_of(work, struct mlxsw_sp_fib_event_work, work);
7699 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7704 mutex_lock(&mlxsw_sp->router->lock);
7705 switch (fib_work->event) {
7706 case FIB_EVENT_ENTRY_REPLACE:
7707 case FIB_EVENT_ENTRY_ADD:
7708 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
7710 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
7713 dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n");
7714 mr_cache_put(fib_work->men_info.mfc);
7716 case FIB_EVENT_ENTRY_DEL:
7717 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
7718 mr_cache_put(fib_work->men_info.mfc);
7720 case FIB_EVENT_VIF_ADD:
7721 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
7722 &fib_work->ven_info);
7724 dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n");
7725 netdev_put(fib_work->ven_info.dev, &fib_work->dev_tracker);
7727 case FIB_EVENT_VIF_DEL:
7728 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
7729 &fib_work->ven_info);
7730 netdev_put(fib_work->ven_info.dev, &fib_work->dev_tracker);
7733 mutex_unlock(&mlxsw_sp->router->lock);
7738 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
7739 struct fib_notifier_info *info)
7741 struct fib_entry_notifier_info *fen_info;
7742 struct fib_nh_notifier_info *fnh_info;
7744 switch (fib_work->event) {
7745 case FIB_EVENT_ENTRY_REPLACE:
7746 case FIB_EVENT_ENTRY_DEL:
7747 fen_info = container_of(info, struct fib_entry_notifier_info,
7749 fib_work->fen_info = *fen_info;
7750 /* Take reference on fib_info to prevent it from being
7751 * freed while work is queued. Release it afterwards.
7753 fib_info_hold(fib_work->fen_info.fi);
7755 case FIB_EVENT_NH_ADD:
7756 case FIB_EVENT_NH_DEL:
7757 fnh_info = container_of(info, struct fib_nh_notifier_info,
7759 fib_work->fnh_info = *fnh_info;
7760 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
7765 static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
7766 struct fib_notifier_info *info)
7768 struct fib6_entry_notifier_info *fen6_info;
7771 switch (fib_work->event) {
7772 case FIB_EVENT_ENTRY_REPLACE:
7773 case FIB_EVENT_ENTRY_APPEND:
7774 case FIB_EVENT_ENTRY_DEL:
7775 fen6_info = container_of(info, struct fib6_entry_notifier_info,
7777 err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
7788 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
7789 struct fib_notifier_info *info)
7791 switch (fib_work->event) {
7792 case FIB_EVENT_ENTRY_REPLACE:
7793 case FIB_EVENT_ENTRY_ADD:
7794 case FIB_EVENT_ENTRY_DEL:
7795 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
7796 mr_cache_hold(fib_work->men_info.mfc);
7798 case FIB_EVENT_VIF_ADD:
7799 case FIB_EVENT_VIF_DEL:
7800 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
7801 netdev_hold(fib_work->ven_info.dev, &fib_work->dev_tracker,
7807 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
7808 struct fib_notifier_info *info,
7809 struct mlxsw_sp *mlxsw_sp)
7811 struct netlink_ext_ack *extack = info->extack;
7812 struct fib_rule_notifier_info *fr_info;
7813 struct fib_rule *rule;
7816 /* nothing to do at the moment */
7817 if (event == FIB_EVENT_RULE_DEL)
7820 fr_info = container_of(info, struct fib_rule_notifier_info, info);
7821 rule = fr_info->rule;
7823 /* Rule only affects locally generated traffic */
7824 if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
7827 switch (info->family) {
7829 if (!fib4_rule_default(rule) && !rule->l3mdev)
7833 if (!fib6_rule_default(rule) && !rule->l3mdev)
7836 case RTNL_FAMILY_IPMR:
7837 if (!ipmr_rule_default(rule) && !rule->l3mdev)
7840 case RTNL_FAMILY_IP6MR:
7841 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
7847 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
7852 /* Called with rcu_read_lock() */
7853 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
7854 unsigned long event, void *ptr)
7856 struct mlxsw_sp_fib_event_work *fib_work;
7857 struct fib_notifier_info *info = ptr;
7858 struct mlxsw_sp_router *router;
7861 if ((info->family != AF_INET && info->family != AF_INET6 &&
7862 info->family != RTNL_FAMILY_IPMR &&
7863 info->family != RTNL_FAMILY_IP6MR))
7866 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7869 case FIB_EVENT_RULE_ADD:
7870 case FIB_EVENT_RULE_DEL:
7871 err = mlxsw_sp_router_fib_rule_event(event, info,
7873 return notifier_from_errno(err);
7874 case FIB_EVENT_ENTRY_ADD:
7875 case FIB_EVENT_ENTRY_REPLACE:
7876 case FIB_EVENT_ENTRY_APPEND:
7877 if (info->family == AF_INET) {
7878 struct fib_entry_notifier_info *fen_info = ptr;
7880 if (fen_info->fi->fib_nh_is_v6) {
7881 NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
7882 return notifier_from_errno(-EINVAL);
7888 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
7892 fib_work->mlxsw_sp = router->mlxsw_sp;
7893 fib_work->event = event;
7895 switch (info->family) {
7897 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
7898 mlxsw_sp_router_fib4_event(fib_work, info);
7901 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
7902 err = mlxsw_sp_router_fib6_event(fib_work, info);
7906 case RTNL_FAMILY_IP6MR:
7907 case RTNL_FAMILY_IPMR:
7908 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
7909 mlxsw_sp_router_fibmr_event(fib_work, info);
7913 mlxsw_core_schedule_work(&fib_work->work);
7922 static struct mlxsw_sp_rif *
7923 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
7924 const struct net_device *dev)
7926 int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7929 for (i = 0; i < max_rifs; i++)
7930 if (mlxsw_sp->router->rifs[i] &&
7931 mlxsw_sp_rif_dev_is(mlxsw_sp->router->rifs[i], dev))
7932 return mlxsw_sp->router->rifs[i];
7937 bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
7938 const struct net_device *dev)
7940 struct mlxsw_sp_rif *rif;
7942 mutex_lock(&mlxsw_sp->router->lock);
7943 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7944 mutex_unlock(&mlxsw_sp->router->lock);
7949 u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
7951 struct mlxsw_sp_rif *rif;
7954 mutex_lock(&mlxsw_sp->router->lock);
7955 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7959 /* We only return the VID for VLAN RIFs. Otherwise we return an
7960 * invalid value (0).
7962 if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
7965 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7968 mutex_unlock(&mlxsw_sp->router->lock);
7972 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
7974 char ritr_pl[MLXSW_REG_RITR_LEN];
7977 mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
7978 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7982 mlxsw_reg_ritr_enable_set(ritr_pl, false);
7983 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7986 static int mlxsw_sp_router_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
7987 struct mlxsw_sp_rif *rif)
7991 err = mlxsw_sp_neigh_rif_made_sync(mlxsw_sp, rif);
7995 err = mlxsw_sp_nexthop_rif_made_sync(mlxsw_sp, rif);
8002 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
8006 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
8007 struct mlxsw_sp_rif *rif)
8009 /* Signal to nexthop cleanup that the RIF is going away. */
8010 rif->crif->rif = NULL;
8012 mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
8013 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
8014 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
8017 static bool __mlxsw_sp_dev_addr_list_empty(const struct net_device *dev)
8019 struct inet6_dev *inet6_dev;
8020 struct in_device *idev;
8022 idev = __in_dev_get_rcu(dev);
8023 if (idev && idev->ifa_list)
8026 inet6_dev = __in6_dev_get(dev);
8027 if (inet6_dev && !list_empty(&inet6_dev->addr_list))
8033 static bool mlxsw_sp_dev_addr_list_empty(const struct net_device *dev)
8035 bool addr_list_empty;
8038 addr_list_empty = __mlxsw_sp_dev_addr_list_empty(dev);
8041 return addr_list_empty;
8045 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
8046 unsigned long event)
8048 bool addr_list_empty;
8054 addr_list_empty = mlxsw_sp_dev_addr_list_empty(dev);
8056 /* macvlans do not have a RIF, but rather piggy back on the
8057 * RIF of their lower device.
8059 if (netif_is_macvlan(dev) && addr_list_empty)
8062 if (rif && addr_list_empty &&
8063 !netif_is_l3_slave(mlxsw_sp_rif_dev(rif)))
8065 /* It is possible we already removed the RIF ourselves
8066 * if it was assigned to a netdev that is now a bridge
8075 static enum mlxsw_sp_rif_type
8076 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
8077 const struct net_device *dev)
8079 enum mlxsw_sp_fid_type type;
8081 if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
8082 return MLXSW_SP_RIF_TYPE_IPIP_LB;
8084 /* Otherwise RIF type is derived from the type of the underlying FID. */
8085 if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
8086 type = MLXSW_SP_FID_TYPE_8021Q;
8087 else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
8088 type = MLXSW_SP_FID_TYPE_8021Q;
8089 else if (netif_is_bridge_master(dev))
8090 type = MLXSW_SP_FID_TYPE_8021D;
8092 type = MLXSW_SP_FID_TYPE_RFID;
8094 return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
8097 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index,
8100 *p_rif_index = gen_pool_alloc(mlxsw_sp->router->rifs_table,
8102 if (*p_rif_index == 0)
8104 *p_rif_index -= MLXSW_SP_ROUTER_GENALLOC_OFFSET;
8106 /* RIF indexes must be aligned to the allocation size. */
8107 WARN_ON_ONCE(*p_rif_index % rif_entries);
8112 static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8115 gen_pool_free(mlxsw_sp->router->rifs_table,
8116 MLXSW_SP_ROUTER_GENALLOC_OFFSET + rif_index, rif_entries);
8119 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
8121 struct mlxsw_sp_crif *crif)
8123 struct net_device *l3_dev = crif ? crif->key.dev : NULL;
8124 struct mlxsw_sp_rif *rif;
8126 rif = kzalloc(rif_size, GFP_KERNEL);
8130 INIT_LIST_HEAD(&rif->neigh_list);
8132 ether_addr_copy(rif->addr, l3_dev->dev_addr);
8133 rif->mtu = l3_dev->mtu;
8136 rif->rif_index = rif_index;
8145 static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif)
8147 WARN_ON(!list_empty(&rif->neigh_list));
8150 rif->crif->rif = NULL;
8154 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
8157 return mlxsw_sp->router->rifs[rif_index];
8160 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
8162 return rif->rif_index;
8165 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
8167 return lb_rif->common.rif_index;
8170 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
8172 struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
8173 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
8174 struct mlxsw_sp_vr *ul_vr;
8176 ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
8177 if (WARN_ON(IS_ERR(ul_vr)))
8183 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
8185 return lb_rif->ul_rif_id;
8189 mlxsw_sp_router_port_l3_stats_enabled(struct mlxsw_sp_rif *rif)
8191 return mlxsw_sp_rif_counter_valid_get(rif,
8192 MLXSW_SP_RIF_COUNTER_EGRESS) &&
8193 mlxsw_sp_rif_counter_valid_get(rif,
8194 MLXSW_SP_RIF_COUNTER_INGRESS);
8198 mlxsw_sp_router_port_l3_stats_enable(struct mlxsw_sp_rif *rif)
8202 err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8206 /* Clear stale data. */
8207 err = mlxsw_sp_rif_counter_fetch_clear(rif,
8208 MLXSW_SP_RIF_COUNTER_INGRESS,
8211 goto err_clear_ingress;
8213 err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8215 goto err_alloc_egress;
8217 /* Clear stale data. */
8218 err = mlxsw_sp_rif_counter_fetch_clear(rif,
8219 MLXSW_SP_RIF_COUNTER_EGRESS,
8222 goto err_clear_egress;
8227 mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8230 mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8235 mlxsw_sp_router_port_l3_stats_disable(struct mlxsw_sp_rif *rif)
8237 mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8238 mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8242 mlxsw_sp_router_port_l3_stats_report_used(struct mlxsw_sp_rif *rif,
8243 struct netdev_notifier_offload_xstats_info *info)
8245 if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
8247 netdev_offload_xstats_report_used(info->report_used);
8251 mlxsw_sp_router_port_l3_stats_fetch(struct mlxsw_sp_rif *rif,
8252 struct rtnl_hw_stats64 *p_stats)
8254 struct mlxsw_sp_rif_counter_set_basic ingress;
8255 struct mlxsw_sp_rif_counter_set_basic egress;
8258 err = mlxsw_sp_rif_counter_fetch_clear(rif,
8259 MLXSW_SP_RIF_COUNTER_INGRESS,
8264 err = mlxsw_sp_rif_counter_fetch_clear(rif,
8265 MLXSW_SP_RIF_COUNTER_EGRESS,
8270 #define MLXSW_SP_ROUTER_ALL_GOOD(SET, SFX) \
8271 ((SET.good_unicast_ ## SFX) + \
8272 (SET.good_multicast_ ## SFX) + \
8273 (SET.good_broadcast_ ## SFX))
8275 p_stats->rx_packets = MLXSW_SP_ROUTER_ALL_GOOD(ingress, packets);
8276 p_stats->tx_packets = MLXSW_SP_ROUTER_ALL_GOOD(egress, packets);
8277 p_stats->rx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(ingress, bytes);
8278 p_stats->tx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(egress, bytes);
8279 p_stats->rx_errors = ingress.error_packets;
8280 p_stats->tx_errors = egress.error_packets;
8281 p_stats->rx_dropped = ingress.discard_packets;
8282 p_stats->tx_dropped = egress.discard_packets;
8283 p_stats->multicast = ingress.good_multicast_packets +
8284 ingress.good_broadcast_packets;
8286 #undef MLXSW_SP_ROUTER_ALL_GOOD
8292 mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif,
8293 struct netdev_notifier_offload_xstats_info *info)
8295 struct rtnl_hw_stats64 stats = {};
8298 if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
8301 err = mlxsw_sp_router_port_l3_stats_fetch(rif, &stats);
8305 netdev_offload_xstats_report_delta(info->report_delta, &stats);
8309 struct mlxsw_sp_router_hwstats_notify_work {
8310 struct work_struct work;
8311 struct net_device *dev;
8312 netdevice_tracker dev_tracker;
8315 static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work)
8317 struct mlxsw_sp_router_hwstats_notify_work *hws_work =
8318 container_of(work, struct mlxsw_sp_router_hwstats_notify_work,
8322 rtnl_offload_xstats_notify(hws_work->dev);
8324 netdev_put(hws_work->dev, &hws_work->dev_tracker);
8329 mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev)
8331 struct mlxsw_sp_router_hwstats_notify_work *hws_work;
8333 /* To collect notification payload, the core ends up sending another
8334 * notifier block message, which would deadlock on the attempt to
8335 * acquire the router lock again. Just postpone the notification until
8339 hws_work = kzalloc(sizeof(*hws_work), GFP_KERNEL);
8343 INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work);
8344 netdev_hold(dev, &hws_work->dev_tracker, GFP_KERNEL);
8345 hws_work->dev = dev;
8346 mlxsw_core_schedule_work(&hws_work->work);
8349 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
8351 return mlxsw_sp_rif_dev(rif)->ifindex;
8354 bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif)
8356 return !!mlxsw_sp_rif_dev(rif);
8359 bool mlxsw_sp_rif_dev_is(const struct mlxsw_sp_rif *rif,
8360 const struct net_device *dev)
8362 return mlxsw_sp_rif_dev(rif) == dev;
8365 static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif)
8367 struct rtnl_hw_stats64 stats = {};
8369 if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats))
8370 netdev_offload_xstats_push_delta(mlxsw_sp_rif_dev(rif),
8371 NETDEV_OFFLOAD_XSTATS_TYPE_L3,
8375 static struct mlxsw_sp_rif *
8376 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
8377 const struct mlxsw_sp_rif_params *params,
8378 struct netlink_ext_ack *extack)
8380 u8 rif_entries = params->double_entry ? 2 : 1;
8381 u32 tb_id = l3mdev_fib_table(params->dev);
8382 const struct mlxsw_sp_rif_ops *ops;
8383 struct mlxsw_sp_fid *fid = NULL;
8384 enum mlxsw_sp_rif_type type;
8385 struct mlxsw_sp_crif *crif;
8386 struct mlxsw_sp_rif *rif;
8387 struct mlxsw_sp_vr *vr;
8391 type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
8392 ops = mlxsw_sp->router->rif_ops_arr[type];
8394 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
8396 return ERR_CAST(vr);
8399 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
8401 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
8402 goto err_rif_index_alloc;
8405 crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, params->dev);
8406 if (WARN_ON(!crif)) {
8408 goto err_crif_lookup;
8411 rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, crif);
8416 netdev_hold(params->dev, &rif->dev_tracker, GFP_KERNEL);
8417 mlxsw_sp->router->rifs[rif_index] = rif;
8418 rif->mlxsw_sp = mlxsw_sp;
8420 rif->rif_entries = rif_entries;
8423 fid = ops->fid_get(rif, params, extack);
8432 ops->setup(rif, params);
8434 err = ops->configure(rif, extack);
8438 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
8439 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
8441 goto err_mr_rif_add;
8444 err = mlxsw_sp_router_rif_made_sync(mlxsw_sp, rif);
8446 goto err_rif_made_sync;
8448 if (netdev_offload_xstats_enabled(params->dev,
8449 NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8450 err = mlxsw_sp_router_port_l3_stats_enable(rif);
8452 goto err_stats_enable;
8453 mlxsw_sp_router_hwstats_notify_schedule(params->dev);
8455 mlxsw_sp_rif_counters_alloc(rif);
8458 atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
8462 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
8465 for (i--; i >= 0; i--)
8466 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8467 ops->deconfigure(rif);
8470 mlxsw_sp_fid_put(fid);
8472 mlxsw_sp->router->rifs[rif_index] = NULL;
8473 netdev_put(params->dev, &rif->dev_tracker);
8474 mlxsw_sp_rif_free(rif);
8477 mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
8478 err_rif_index_alloc:
8480 mlxsw_sp_vr_put(mlxsw_sp, vr);
8481 return ERR_PTR(err);
8484 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
8486 struct net_device *dev = mlxsw_sp_rif_dev(rif);
8487 const struct mlxsw_sp_rif_ops *ops = rif->ops;
8488 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8489 struct mlxsw_sp_crif *crif = rif->crif;
8490 struct mlxsw_sp_fid *fid = rif->fid;
8491 u8 rif_entries = rif->rif_entries;
8492 u16 rif_index = rif->rif_index;
8493 struct mlxsw_sp_vr *vr;
8496 atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
8497 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
8498 vr = &mlxsw_sp->router->vrs[rif->vr_id];
8500 if (netdev_offload_xstats_enabled(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8501 mlxsw_sp_rif_push_l3_stats(rif);
8502 mlxsw_sp_router_port_l3_stats_disable(rif);
8503 mlxsw_sp_router_hwstats_notify_schedule(dev);
8505 mlxsw_sp_rif_counters_free(rif);
8508 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
8509 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8510 ops->deconfigure(rif);
8512 /* Loopback RIFs are not associated with a FID. */
8513 mlxsw_sp_fid_put(fid);
8514 mlxsw_sp->router->rifs[rif->rif_index] = NULL;
8515 netdev_put(dev, &rif->dev_tracker);
8516 mlxsw_sp_rif_free(rif);
8517 mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
8519 mlxsw_sp_vr_put(mlxsw_sp, vr);
8521 if (crif->can_destroy)
8522 mlxsw_sp_crif_free(crif);
8525 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
8526 struct net_device *dev)
8528 struct mlxsw_sp_rif *rif;
8530 mutex_lock(&mlxsw_sp->router->lock);
8531 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8534 mlxsw_sp_rif_destroy(rif);
8536 mutex_unlock(&mlxsw_sp->router->lock);
8539 static void mlxsw_sp_rif_destroy_vlan_upper(struct mlxsw_sp *mlxsw_sp,
8540 struct net_device *br_dev,
8543 struct net_device *upper_dev;
8544 struct mlxsw_sp_crif *crif;
8547 upper_dev = __vlan_find_dev_deep_rcu(br_dev, htons(ETH_P_8021Q), vid);
8553 crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, upper_dev);
8554 if (!crif || !crif->rif)
8557 mlxsw_sp_rif_destroy(crif->rif);
8560 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
8561 struct net_device *l3_dev,
8563 unsigned long event,
8564 struct netlink_ext_ack *extack);
8566 int mlxsw_sp_router_bridge_vlan_add(struct mlxsw_sp *mlxsw_sp,
8567 struct net_device *br_dev,
8568 u16 new_vid, bool is_pvid,
8569 struct netlink_ext_ack *extack)
8571 struct mlxsw_sp_rif *old_rif;
8572 struct mlxsw_sp_rif *new_rif;
8573 struct net_device *upper_dev;
8578 mutex_lock(&mlxsw_sp->router->lock);
8579 old_rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev);
8581 /* If the RIF on the bridge is not a VLAN RIF, we shouldn't have
8582 * gotten a PVID notification.
8584 if (WARN_ON(old_rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN))
8587 old_pvid = mlxsw_sp_fid_8021q_vid(old_rif->fid);
8592 else if (old_pvid == new_vid)
8597 if (old_pvid == new_pvid)
8601 struct mlxsw_sp_rif_params params = {
8606 /* If there is a VLAN upper with the same VID as the new PVID,
8607 * kill its RIF, if there is one.
8609 mlxsw_sp_rif_destroy_vlan_upper(mlxsw_sp, br_dev, new_pvid);
8611 if (mlxsw_sp_dev_addr_list_empty(br_dev))
8613 new_rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack);
8614 if (IS_ERR(new_rif)) {
8615 err = PTR_ERR(new_rif);
8620 mlxsw_sp_rif_migrate_destroy(mlxsw_sp, old_rif, new_rif,
8623 mlxsw_sp_rif_destroy(old_rif);
8628 upper_dev = __vlan_find_dev_deep_rcu(br_dev, htons(ETH_P_8021Q),
8632 err = mlxsw_sp_inetaddr_bridge_event(mlxsw_sp,
8639 mutex_unlock(&mlxsw_sp->router->lock);
8644 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
8645 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8647 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8649 params->vid = mlxsw_sp_port_vlan->vid;
8650 params->lag = mlxsw_sp_port->lagged;
8652 params->lag_id = mlxsw_sp_port->lag_id;
8654 params->system_port = mlxsw_sp_port->local_port;
8657 static struct mlxsw_sp_rif_subport *
8658 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
8660 return container_of(rif, struct mlxsw_sp_rif_subport, common);
8663 static struct mlxsw_sp_rif *
8664 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
8665 const struct mlxsw_sp_rif_params *params,
8666 struct netlink_ext_ack *extack)
8668 struct mlxsw_sp_rif_subport *rif_subport;
8669 struct mlxsw_sp_rif *rif;
8671 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
8673 return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
8675 rif_subport = mlxsw_sp_rif_subport_rif(rif);
8676 refcount_inc(&rif_subport->ref_count);
8680 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
8682 struct mlxsw_sp_rif_subport *rif_subport;
8684 rif_subport = mlxsw_sp_rif_subport_rif(rif);
8685 if (!refcount_dec_and_test(&rif_subport->ref_count))
8688 mlxsw_sp_rif_destroy(rif);
8691 static int mlxsw_sp_rif_mac_profile_index_alloc(struct mlxsw_sp *mlxsw_sp,
8692 struct mlxsw_sp_rif_mac_profile *profile,
8693 struct netlink_ext_ack *extack)
8695 u8 max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
8696 struct mlxsw_sp_router *router = mlxsw_sp->router;
8699 id = idr_alloc(&router->rif_mac_profiles_idr, profile, 0,
8700 max_rif_mac_profiles, GFP_KERNEL);
8708 NL_SET_ERR_MSG_MOD(extack,
8709 "Exceeded number of supported router interface MAC profiles");
8714 static struct mlxsw_sp_rif_mac_profile *
8715 mlxsw_sp_rif_mac_profile_index_free(struct mlxsw_sp *mlxsw_sp, u8 mac_profile)
8717 struct mlxsw_sp_rif_mac_profile *profile;
8719 profile = idr_remove(&mlxsw_sp->router->rif_mac_profiles_idr,
8725 static struct mlxsw_sp_rif_mac_profile *
8726 mlxsw_sp_rif_mac_profile_alloc(const char *mac)
8728 struct mlxsw_sp_rif_mac_profile *profile;
8730 profile = kzalloc(sizeof(*profile), GFP_KERNEL);
8734 ether_addr_copy(profile->mac_prefix, mac);
8735 refcount_set(&profile->ref_count, 1);
8739 static struct mlxsw_sp_rif_mac_profile *
8740 mlxsw_sp_rif_mac_profile_find(const struct mlxsw_sp *mlxsw_sp, const char *mac)
8742 struct mlxsw_sp_router *router = mlxsw_sp->router;
8743 struct mlxsw_sp_rif_mac_profile *profile;
8746 idr_for_each_entry(&router->rif_mac_profiles_idr, profile, id) {
8747 if (ether_addr_equal_masked(profile->mac_prefix, mac,
8748 mlxsw_sp->mac_mask))
8755 static u64 mlxsw_sp_rif_mac_profiles_occ_get(void *priv)
8757 const struct mlxsw_sp *mlxsw_sp = priv;
8759 return atomic_read(&mlxsw_sp->router->rif_mac_profiles_count);
8762 static u64 mlxsw_sp_rifs_occ_get(void *priv)
8764 const struct mlxsw_sp *mlxsw_sp = priv;
8766 return atomic_read(&mlxsw_sp->router->rifs_count);
8769 static struct mlxsw_sp_rif_mac_profile *
8770 mlxsw_sp_rif_mac_profile_create(struct mlxsw_sp *mlxsw_sp, const char *mac,
8771 struct netlink_ext_ack *extack)
8773 struct mlxsw_sp_rif_mac_profile *profile;
8776 profile = mlxsw_sp_rif_mac_profile_alloc(mac);
8778 return ERR_PTR(-ENOMEM);
8780 err = mlxsw_sp_rif_mac_profile_index_alloc(mlxsw_sp, profile, extack);
8782 goto profile_index_alloc_err;
8784 atomic_inc(&mlxsw_sp->router->rif_mac_profiles_count);
8787 profile_index_alloc_err:
8789 return ERR_PTR(err);
8792 static void mlxsw_sp_rif_mac_profile_destroy(struct mlxsw_sp *mlxsw_sp,
8795 struct mlxsw_sp_rif_mac_profile *profile;
8797 atomic_dec(&mlxsw_sp->router->rif_mac_profiles_count);
8798 profile = mlxsw_sp_rif_mac_profile_index_free(mlxsw_sp, mac_profile);
8802 static int mlxsw_sp_rif_mac_profile_get(struct mlxsw_sp *mlxsw_sp,
8803 const char *mac, u8 *p_mac_profile,
8804 struct netlink_ext_ack *extack)
8806 struct mlxsw_sp_rif_mac_profile *profile;
8808 profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, mac);
8810 refcount_inc(&profile->ref_count);
8814 profile = mlxsw_sp_rif_mac_profile_create(mlxsw_sp, mac, extack);
8815 if (IS_ERR(profile))
8816 return PTR_ERR(profile);
8819 *p_mac_profile = profile->id;
8823 static void mlxsw_sp_rif_mac_profile_put(struct mlxsw_sp *mlxsw_sp,
8826 struct mlxsw_sp_rif_mac_profile *profile;
8828 profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8830 if (WARN_ON(!profile))
8833 if (!refcount_dec_and_test(&profile->ref_count))
8836 mlxsw_sp_rif_mac_profile_destroy(mlxsw_sp, mac_profile);
8839 static bool mlxsw_sp_rif_mac_profile_is_shared(const struct mlxsw_sp_rif *rif)
8841 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8842 struct mlxsw_sp_rif_mac_profile *profile;
8844 profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8845 rif->mac_profile_id);
8846 if (WARN_ON(!profile))
8849 return refcount_read(&profile->ref_count) > 1;
8852 static int mlxsw_sp_rif_mac_profile_edit(struct mlxsw_sp_rif *rif,
8853 const char *new_mac)
8855 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8856 struct mlxsw_sp_rif_mac_profile *profile;
8858 profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8859 rif->mac_profile_id);
8860 if (WARN_ON(!profile))
8863 ether_addr_copy(profile->mac_prefix, new_mac);
8868 mlxsw_sp_rif_mac_profile_replace(struct mlxsw_sp *mlxsw_sp,
8869 struct mlxsw_sp_rif *rif,
8870 const char *new_mac,
8871 struct netlink_ext_ack *extack)
8876 if (!mlxsw_sp_rif_mac_profile_is_shared(rif) &&
8877 !mlxsw_sp_rif_mac_profile_find(mlxsw_sp, new_mac))
8878 return mlxsw_sp_rif_mac_profile_edit(rif, new_mac);
8880 err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, new_mac,
8881 &mac_profile, extack);
8885 mlxsw_sp_rif_mac_profile_put(mlxsw_sp, rif->mac_profile_id);
8886 rif->mac_profile_id = mac_profile;
8891 __mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8892 struct net_device *l3_dev,
8893 struct netlink_ext_ack *extack)
8895 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8896 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
8897 struct mlxsw_sp_rif_params params;
8898 u16 vid = mlxsw_sp_port_vlan->vid;
8899 struct mlxsw_sp_rif *rif;
8900 struct mlxsw_sp_fid *fid;
8903 params = (struct mlxsw_sp_rif_params) {
8908 mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan);
8909 rif = mlxsw_sp_rif_subport_get(mlxsw_sp, ¶ms, extack);
8911 return PTR_ERR(rif);
8913 /* FID was already created, just take a reference */
8914 fid = rif->ops->fid_get(rif, ¶ms, extack);
8915 err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
8917 goto err_fid_port_vid_map;
8919 err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
8921 goto err_port_vid_learning_set;
8923 err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
8924 BR_STATE_FORWARDING);
8926 goto err_port_vid_stp_set;
8928 mlxsw_sp_port_vlan->fid = fid;
8932 err_port_vid_stp_set:
8933 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8934 err_port_vid_learning_set:
8935 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8936 err_fid_port_vid_map:
8937 mlxsw_sp_fid_put(fid);
8938 mlxsw_sp_rif_subport_put(rif);
8943 __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8945 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8946 struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
8947 struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
8948 u16 vid = mlxsw_sp_port_vlan->vid;
8950 if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
8953 mlxsw_sp_port_vlan->fid = NULL;
8954 mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
8955 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8956 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8957 mlxsw_sp_fid_put(fid);
8958 mlxsw_sp_rif_subport_put(rif);
8962 mlxsw_sp_port_vlan_router_join_existing(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8963 struct net_device *l3_dev,
8964 struct netlink_ext_ack *extack)
8966 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8968 lockdep_assert_held(&mlxsw_sp->router->lock);
8970 if (!mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev))
8973 return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev,
8978 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8980 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8982 mutex_lock(&mlxsw_sp->router->lock);
8983 __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
8984 mutex_unlock(&mlxsw_sp->router->lock);
8987 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
8988 struct net_device *port_dev,
8989 unsigned long event, u16 vid,
8990 struct netlink_ext_ack *extack)
8992 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
8993 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
8995 mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
8996 if (WARN_ON(!mlxsw_sp_port_vlan))
9001 return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
9004 __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
9011 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
9012 unsigned long event, bool nomaster,
9013 struct netlink_ext_ack *extack)
9015 if (!nomaster && (netif_is_any_bridge_port(port_dev) ||
9016 netif_is_lag_port(port_dev)))
9019 return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
9020 MLXSW_SP_DEFAULT_VID, extack);
9023 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
9024 struct net_device *lag_dev,
9025 unsigned long event, u16 vid,
9026 struct netlink_ext_ack *extack)
9028 struct net_device *port_dev;
9029 struct list_head *iter;
9032 netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
9033 if (mlxsw_sp_port_dev_check(port_dev)) {
9034 err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
9046 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
9047 unsigned long event, bool nomaster,
9048 struct netlink_ext_ack *extack)
9050 if (!nomaster && netif_is_bridge_port(lag_dev))
9053 return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
9054 MLXSW_SP_DEFAULT_VID, extack);
9057 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
9058 struct net_device *l3_dev,
9060 unsigned long event,
9061 struct netlink_ext_ack *extack)
9063 struct mlxsw_sp_rif_params params = {
9066 struct mlxsw_sp_rif *rif;
9071 if (netif_is_bridge_master(l3_dev) && br_vlan_enabled(l3_dev)) {
9074 br_vlan_get_proto(l3_dev, &proto);
9075 if (proto == ETH_P_8021AD) {
9076 NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported");
9079 err = br_vlan_get_pvid(l3_dev, ¶ms.vid);
9084 } else if (is_vlan_dev(l3_dev)) {
9085 params.vid = vlan_dev_vlan_id(l3_dev);
9087 /* If the VID matches PVID of the bridge below, the
9088 * bridge owns the RIF for this VLAN. Don't do anything.
9090 if ((int)params.vid == lower_pvid)
9094 rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack);
9096 return PTR_ERR(rif);
9099 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9100 mlxsw_sp_rif_destroy(rif);
9107 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
9108 struct net_device *vlan_dev,
9109 unsigned long event, bool nomaster,
9110 struct netlink_ext_ack *extack)
9112 struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
9113 u16 vid = vlan_dev_vlan_id(vlan_dev);
9117 if (!nomaster && netif_is_bridge_port(vlan_dev))
9120 if (mlxsw_sp_port_dev_check(real_dev)) {
9121 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
9122 event, vid, extack);
9123 } else if (netif_is_lag_master(real_dev)) {
9124 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
9126 } else if (netif_is_bridge_master(real_dev) &&
9127 br_vlan_enabled(real_dev)) {
9128 err = br_vlan_get_pvid(real_dev, &lower_pvid);
9131 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev,
9139 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
9141 u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
9142 u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
9144 return ether_addr_equal_masked(mac, vrrp4, mask);
9147 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
9149 u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
9150 u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
9152 return ether_addr_equal_masked(mac, vrrp6, mask);
9155 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
9156 const u8 *mac, bool adding)
9158 char ritr_pl[MLXSW_REG_RITR_LEN];
9159 u8 vrrp_id = adding ? mac[5] : 0;
9162 if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
9163 !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
9166 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
9167 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9171 if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
9172 mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
9174 mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
9176 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9179 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
9180 const struct net_device *macvlan_dev,
9181 struct netlink_ext_ack *extack)
9183 struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
9184 struct mlxsw_sp_rif *rif;
9187 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
9191 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
9192 mlxsw_sp_fid_index(rif->fid), true);
9196 err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
9197 macvlan_dev->dev_addr, true);
9199 goto err_rif_vrrp_add;
9201 /* Make sure the bridge driver does not have this MAC pointing at
9204 if (rif->ops->fdb_del)
9205 rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
9210 mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
9211 mlxsw_sp_fid_index(rif->fid), false);
9215 static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
9216 const struct net_device *macvlan_dev)
9218 struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
9219 struct mlxsw_sp_rif *rif;
9221 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
9222 /* If we do not have a RIF, then we already took care of
9223 * removing the macvlan's MAC during RIF deletion.
9227 mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
9229 mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
9230 mlxsw_sp_fid_index(rif->fid), false);
9233 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
9234 const struct net_device *macvlan_dev)
9236 mutex_lock(&mlxsw_sp->router->lock);
9237 __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
9238 mutex_unlock(&mlxsw_sp->router->lock);
9241 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
9242 struct net_device *macvlan_dev,
9243 unsigned long event,
9244 struct netlink_ext_ack *extack)
9248 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
9250 __mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
9257 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
9258 struct net_device *dev,
9259 unsigned long event, bool nomaster,
9260 struct netlink_ext_ack *extack)
9262 if (mlxsw_sp_port_dev_check(dev))
9263 return mlxsw_sp_inetaddr_port_event(dev, event, nomaster,
9265 else if (netif_is_lag_master(dev))
9266 return mlxsw_sp_inetaddr_lag_event(dev, event, nomaster,
9268 else if (netif_is_bridge_master(dev))
9269 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, -1, event,
9271 else if (is_vlan_dev(dev))
9272 return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
9274 else if (netif_is_macvlan(dev))
9275 return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
9281 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
9282 unsigned long event, void *ptr)
9284 struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
9285 struct net_device *dev = ifa->ifa_dev->dev;
9286 struct mlxsw_sp_router *router;
9287 struct mlxsw_sp_rif *rif;
9290 /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
9291 if (event == NETDEV_UP)
9294 router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
9295 mutex_lock(&router->lock);
9296 rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
9297 if (!mlxsw_sp_rif_should_config(rif, dev, event))
9300 err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, false,
9303 mutex_unlock(&router->lock);
9304 return notifier_from_errno(err);
9307 static int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
9308 unsigned long event, void *ptr)
9310 struct in_validator_info *ivi = (struct in_validator_info *) ptr;
9311 struct net_device *dev = ivi->ivi_dev->dev;
9312 struct mlxsw_sp *mlxsw_sp;
9313 struct mlxsw_sp_rif *rif;
9316 mlxsw_sp = mlxsw_sp_lower_get(dev);
9320 mutex_lock(&mlxsw_sp->router->lock);
9321 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9322 if (!mlxsw_sp_rif_should_config(rif, dev, event))
9325 err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false,
9328 mutex_unlock(&mlxsw_sp->router->lock);
9329 return notifier_from_errno(err);
9332 struct mlxsw_sp_inet6addr_event_work {
9333 struct work_struct work;
9334 struct mlxsw_sp *mlxsw_sp;
9335 struct net_device *dev;
9336 netdevice_tracker dev_tracker;
9337 unsigned long event;
9340 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
9342 struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
9343 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
9344 struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
9345 struct net_device *dev = inet6addr_work->dev;
9346 unsigned long event = inet6addr_work->event;
9347 struct mlxsw_sp_rif *rif;
9350 mutex_lock(&mlxsw_sp->router->lock);
9352 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9353 if (!mlxsw_sp_rif_should_config(rif, dev, event))
9356 __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false, NULL);
9358 mutex_unlock(&mlxsw_sp->router->lock);
9360 netdev_put(dev, &inet6addr_work->dev_tracker);
9361 kfree(inet6addr_work);
9364 /* Called with rcu_read_lock() */
9365 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
9366 unsigned long event, void *ptr)
9368 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
9369 struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
9370 struct net_device *dev = if6->idev->dev;
9371 struct mlxsw_sp_router *router;
9373 /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
9374 if (event == NETDEV_UP)
9377 inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
9378 if (!inet6addr_work)
9381 router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
9382 INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
9383 inet6addr_work->mlxsw_sp = router->mlxsw_sp;
9384 inet6addr_work->dev = dev;
9385 inet6addr_work->event = event;
9386 netdev_hold(dev, &inet6addr_work->dev_tracker, GFP_ATOMIC);
9387 mlxsw_core_schedule_work(&inet6addr_work->work);
9392 static int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
9393 unsigned long event, void *ptr)
9395 struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
9396 struct net_device *dev = i6vi->i6vi_dev->dev;
9397 struct mlxsw_sp *mlxsw_sp;
9398 struct mlxsw_sp_rif *rif;
9401 mlxsw_sp = mlxsw_sp_lower_get(dev);
9405 mutex_lock(&mlxsw_sp->router->lock);
9406 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9407 if (!mlxsw_sp_rif_should_config(rif, dev, event))
9410 err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false,
9413 mutex_unlock(&mlxsw_sp->router->lock);
9414 return notifier_from_errno(err);
9417 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
9418 const char *mac, int mtu, u8 mac_profile)
9420 char ritr_pl[MLXSW_REG_RITR_LEN];
9423 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
9424 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9428 mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
9429 mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
9430 mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, mac_profile);
9431 mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
9432 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9436 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
9437 struct mlxsw_sp_rif *rif,
9438 struct netlink_ext_ack *extack)
9440 struct net_device *dev = mlxsw_sp_rif_dev(rif);
9445 fid_index = mlxsw_sp_fid_index(rif->fid);
9447 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
9451 old_mac_profile = rif->mac_profile_id;
9452 err = mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, dev->dev_addr,
9455 goto err_rif_mac_profile_replace;
9457 err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
9458 dev->mtu, rif->mac_profile_id);
9462 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
9464 goto err_rif_fdb_op;
9466 if (rif->mtu != dev->mtu) {
9467 struct mlxsw_sp_vr *vr;
9470 /* The RIF is relevant only to its mr_table instance, as unlike
9471 * unicast routing, in multicast routing a RIF cannot be shared
9472 * between several multicast routing tables.
9474 vr = &mlxsw_sp->router->vrs[rif->vr_id];
9475 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
9476 mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
9480 ether_addr_copy(rif->addr, dev->dev_addr);
9481 rif->mtu = dev->mtu;
9483 netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
9488 mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu,
9491 mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, rif->addr, extack);
9492 err_rif_mac_profile_replace:
9493 mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
9497 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
9498 struct netdev_notifier_pre_changeaddr_info *info)
9500 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9501 struct mlxsw_sp_rif_mac_profile *profile;
9502 struct netlink_ext_ack *extack;
9503 u8 max_rif_mac_profiles;
9506 extack = netdev_notifier_info_to_extack(&info->info);
9508 profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, info->dev_addr);
9512 max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
9513 occ = mlxsw_sp_rif_mac_profiles_occ_get(mlxsw_sp);
9514 if (occ < max_rif_mac_profiles)
9517 if (!mlxsw_sp_rif_mac_profile_is_shared(rif))
9520 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interface MAC profiles");
9524 static bool mlxsw_sp_router_netdevice_interesting(struct mlxsw_sp *mlxsw_sp,
9525 struct net_device *dev)
9527 struct vlan_dev_priv *vlan;
9529 if (netif_is_lag_master(dev) ||
9530 netif_is_bridge_master(dev) ||
9531 mlxsw_sp_port_dev_check(dev) ||
9532 mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev) ||
9533 netif_is_l3_master(dev))
9536 if (!is_vlan_dev(dev))
9539 vlan = vlan_dev_priv(dev);
9540 return netif_is_lag_master(vlan->real_dev) ||
9541 netif_is_bridge_master(vlan->real_dev) ||
9542 mlxsw_sp_port_dev_check(vlan->real_dev);
9545 static struct mlxsw_sp_crif *
9546 mlxsw_sp_crif_register(struct mlxsw_sp_router *router, struct net_device *dev)
9548 struct mlxsw_sp_crif *crif;
9551 if (WARN_ON(mlxsw_sp_crif_lookup(router, dev)))
9554 crif = mlxsw_sp_crif_alloc(dev);
9556 return ERR_PTR(-ENOMEM);
9558 err = mlxsw_sp_crif_insert(router, crif);
9560 goto err_netdev_insert;
9565 mlxsw_sp_crif_free(crif);
9566 return ERR_PTR(err);
9569 static void mlxsw_sp_crif_unregister(struct mlxsw_sp_router *router,
9570 struct mlxsw_sp_crif *crif)
9572 struct mlxsw_sp_nexthop *nh, *tmp;
9574 mlxsw_sp_crif_remove(router, crif);
9576 list_for_each_entry_safe(nh, tmp, &crif->nexthop_list, crif_list_node)
9577 mlxsw_sp_nexthop_type_fini(router->mlxsw_sp, nh);
9580 crif->can_destroy = true;
9582 mlxsw_sp_crif_free(crif);
9585 static int mlxsw_sp_netdevice_register(struct mlxsw_sp_router *router,
9586 struct net_device *dev)
9588 struct mlxsw_sp_crif *crif;
9590 if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev))
9593 crif = mlxsw_sp_crif_register(router, dev);
9594 return PTR_ERR_OR_ZERO(crif);
9597 static void mlxsw_sp_netdevice_unregister(struct mlxsw_sp_router *router,
9598 struct net_device *dev)
9600 struct mlxsw_sp_crif *crif;
9602 if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev))
9605 /* netdev_run_todo(), by way of netdev_wait_allrefs_any(), rebroadcasts
9606 * the NETDEV_UNREGISTER message, so we can get here twice. If that's
9607 * what happened, the netdevice state is NETREG_UNREGISTERED. In that
9608 * case, we expect to have collected the CRIF already, and warn if it
9609 * still exists. Otherwise we expect the CRIF to exist.
9611 crif = mlxsw_sp_crif_lookup(router, dev);
9612 if (dev->reg_state == NETREG_UNREGISTERED) {
9619 mlxsw_sp_crif_unregister(router, crif);
9622 static bool mlxsw_sp_is_offload_xstats_event(unsigned long event)
9625 case NETDEV_OFFLOAD_XSTATS_ENABLE:
9626 case NETDEV_OFFLOAD_XSTATS_DISABLE:
9627 case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9628 case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9636 mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif,
9637 unsigned long event,
9638 struct netdev_notifier_offload_xstats_info *info)
9640 switch (info->type) {
9641 case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
9648 case NETDEV_OFFLOAD_XSTATS_ENABLE:
9649 return mlxsw_sp_router_port_l3_stats_enable(rif);
9650 case NETDEV_OFFLOAD_XSTATS_DISABLE:
9651 mlxsw_sp_router_port_l3_stats_disable(rif);
9653 case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9654 mlxsw_sp_router_port_l3_stats_report_used(rif, info);
9656 case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9657 return mlxsw_sp_router_port_l3_stats_report_delta(rif, info);
9665 mlxsw_sp_netdevice_offload_xstats_cmd(struct mlxsw_sp *mlxsw_sp,
9666 struct net_device *dev,
9667 unsigned long event,
9668 struct netdev_notifier_offload_xstats_info *info)
9670 struct mlxsw_sp_rif *rif;
9672 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9676 return mlxsw_sp_router_port_offload_xstats_cmd(rif, event, info);
9679 static bool mlxsw_sp_is_router_event(unsigned long event)
9682 case NETDEV_PRE_CHANGEADDR:
9683 case NETDEV_CHANGEADDR:
9684 case NETDEV_CHANGEMTU:
9691 static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
9692 unsigned long event, void *ptr)
9694 struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
9695 struct mlxsw_sp *mlxsw_sp;
9696 struct mlxsw_sp_rif *rif;
9698 mlxsw_sp = mlxsw_sp_lower_get(dev);
9702 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9707 case NETDEV_CHANGEMTU:
9708 case NETDEV_CHANGEADDR:
9709 return mlxsw_sp_router_port_change_event(mlxsw_sp, rif, extack);
9710 case NETDEV_PRE_CHANGEADDR:
9711 return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
9720 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
9721 struct net_device *l3_dev,
9722 struct netlink_ext_ack *extack)
9724 struct mlxsw_sp_rif *rif;
9726 /* If netdev is already associated with a RIF, then we need to
9727 * destroy it and create a new one with the new virtual router ID.
9729 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9731 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, false,
9734 return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, false,
9738 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
9739 struct net_device *l3_dev)
9741 struct mlxsw_sp_rif *rif;
9743 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9746 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, false, NULL);
9749 static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
9751 struct netdev_notifier_changeupper_info *info = ptr;
9753 if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER)
9755 return netif_is_l3_master(info->upper_dev);
9759 mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
9760 struct netdev_notifier_changeupper_info *info)
9762 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
9765 /* We do not create a RIF for a macvlan, but only use it to
9766 * direct more MAC addresses to the router.
9768 if (!mlxsw_sp || netif_is_macvlan(l3_dev))
9772 case NETDEV_PRECHANGEUPPER:
9774 case NETDEV_CHANGEUPPER:
9775 if (info->linking) {
9776 struct netlink_ext_ack *extack;
9778 extack = netdev_notifier_info_to_extack(&info->info);
9779 err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
9781 mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
9789 struct mlxsw_sp_router_replay_inetaddr_up {
9790 struct mlxsw_sp *mlxsw_sp;
9791 struct netlink_ext_ack *extack;
9796 static int mlxsw_sp_router_replay_inetaddr_up(struct net_device *dev,
9797 struct netdev_nested_priv *priv)
9799 struct mlxsw_sp_router_replay_inetaddr_up *ctx = priv->data;
9800 bool nomaster = ctx->deslavement;
9801 struct mlxsw_sp_crif *crif;
9804 if (mlxsw_sp_dev_addr_list_empty(dev))
9807 crif = mlxsw_sp_crif_lookup(ctx->mlxsw_sp->router, dev);
9808 if (!crif || crif->rif)
9811 if (!mlxsw_sp_rif_should_config(crif->rif, dev, NETDEV_UP))
9814 err = __mlxsw_sp_inetaddr_event(ctx->mlxsw_sp, dev, NETDEV_UP,
9815 nomaster, ctx->extack);
9823 static int mlxsw_sp_router_unreplay_inetaddr_up(struct net_device *dev,
9824 struct netdev_nested_priv *priv)
9826 struct mlxsw_sp_router_replay_inetaddr_up *ctx = priv->data;
9827 bool nomaster = ctx->deslavement;
9828 struct mlxsw_sp_crif *crif;
9833 if (mlxsw_sp_dev_addr_list_empty(dev))
9836 crif = mlxsw_sp_crif_lookup(ctx->mlxsw_sp->router, dev);
9837 if (!crif || !crif->rif)
9840 /* We are rolling back NETDEV_UP, so ask for that. */
9841 if (!mlxsw_sp_rif_should_config(crif->rif, dev, NETDEV_UP))
9844 __mlxsw_sp_inetaddr_event(ctx->mlxsw_sp, dev, NETDEV_DOWN, nomaster,
9851 int mlxsw_sp_netdevice_enslavement_replay(struct mlxsw_sp *mlxsw_sp,
9852 struct net_device *upper_dev,
9853 struct netlink_ext_ack *extack)
9855 struct mlxsw_sp_router_replay_inetaddr_up ctx = {
9856 .mlxsw_sp = mlxsw_sp,
9858 .deslavement = false,
9860 struct netdev_nested_priv priv = {
9865 err = mlxsw_sp_router_replay_inetaddr_up(upper_dev, &priv);
9869 err = netdev_walk_all_upper_dev_rcu(upper_dev,
9870 mlxsw_sp_router_replay_inetaddr_up,
9878 netdev_walk_all_upper_dev_rcu(upper_dev,
9879 mlxsw_sp_router_unreplay_inetaddr_up,
9881 mlxsw_sp_router_unreplay_inetaddr_up(upper_dev, &priv);
9885 void mlxsw_sp_netdevice_deslavement_replay(struct mlxsw_sp *mlxsw_sp,
9886 struct net_device *dev)
9888 struct mlxsw_sp_router_replay_inetaddr_up ctx = {
9889 .mlxsw_sp = mlxsw_sp,
9890 .deslavement = true,
9892 struct netdev_nested_priv priv = {
9896 mlxsw_sp_router_replay_inetaddr_up(dev, &priv);
9900 mlxsw_sp_port_vid_router_join_existing(struct mlxsw_sp_port *mlxsw_sp_port,
9901 u16 vid, struct net_device *dev,
9902 struct netlink_ext_ack *extack)
9904 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
9906 mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port,
9908 if (WARN_ON(!mlxsw_sp_port_vlan))
9911 return mlxsw_sp_port_vlan_router_join_existing(mlxsw_sp_port_vlan,
9916 mlxsw_sp_port_vid_router_leave(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
9917 struct net_device *dev)
9919 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
9921 mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port,
9923 if (WARN_ON(!mlxsw_sp_port_vlan))
9926 __mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
9929 static int __mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
9930 struct net_device *lag_dev,
9931 struct netlink_ext_ack *extack)
9933 u16 default_vid = MLXSW_SP_DEFAULT_VID;
9934 struct net_device *upper_dev;
9935 struct list_head *iter;
9940 err = mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, default_vid,
9945 netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
9946 if (!is_vlan_dev(upper_dev))
9949 vid = vlan_dev_vlan_id(upper_dev);
9950 err = mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, vid,
9953 goto err_router_join_dev;
9960 err_router_join_dev:
9961 netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
9962 if (!is_vlan_dev(upper_dev))
9967 vid = vlan_dev_vlan_id(upper_dev);
9968 mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, vid, upper_dev);
9971 mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, default_vid, lag_dev);
9976 __mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port,
9977 struct net_device *lag_dev)
9979 u16 default_vid = MLXSW_SP_DEFAULT_VID;
9980 struct net_device *upper_dev;
9981 struct list_head *iter;
9984 netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
9985 if (!is_vlan_dev(upper_dev))
9988 vid = vlan_dev_vlan_id(upper_dev);
9989 mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, vid, upper_dev);
9992 mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, default_vid, lag_dev);
9995 int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
9996 struct net_device *lag_dev,
9997 struct netlink_ext_ack *extack)
10001 mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock);
10002 err = __mlxsw_sp_router_port_join_lag(mlxsw_sp_port, lag_dev, extack);
10003 mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock);
10008 void mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port,
10009 struct net_device *lag_dev)
10011 mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock);
10012 __mlxsw_sp_router_port_leave_lag(mlxsw_sp_port, lag_dev);
10013 mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock);
10016 static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb,
10017 unsigned long event, void *ptr)
10019 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
10020 struct mlxsw_sp_router *router;
10021 struct mlxsw_sp *mlxsw_sp;
10024 router = container_of(nb, struct mlxsw_sp_router, netdevice_nb);
10025 mlxsw_sp = router->mlxsw_sp;
10027 mutex_lock(&mlxsw_sp->router->lock);
10029 if (event == NETDEV_REGISTER) {
10030 err = mlxsw_sp_netdevice_register(router, dev);
10032 /* No need to roll this back, UNREGISTER will collect it
10038 if (mlxsw_sp_is_offload_xstats_event(event))
10039 err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev,
10041 else if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
10042 err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
10044 else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev))
10045 err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev,
10047 else if (mlxsw_sp_is_router_event(event))
10048 err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr);
10049 else if (mlxsw_sp_is_vrf_event(event, ptr))
10050 err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
10052 if (event == NETDEV_UNREGISTER)
10053 mlxsw_sp_netdevice_unregister(router, dev);
10056 mutex_unlock(&mlxsw_sp->router->lock);
10058 return notifier_from_errno(err);
10061 struct mlxsw_sp_macvlan_replay {
10062 struct mlxsw_sp *mlxsw_sp;
10063 struct netlink_ext_ack *extack;
10066 static int mlxsw_sp_macvlan_replay_upper(struct net_device *dev,
10067 struct netdev_nested_priv *priv)
10069 const struct mlxsw_sp_macvlan_replay *rms = priv->data;
10070 struct netlink_ext_ack *extack = rms->extack;
10071 struct mlxsw_sp *mlxsw_sp = rms->mlxsw_sp;
10073 if (!netif_is_macvlan(dev))
10076 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, dev, extack);
10079 static int mlxsw_sp_macvlan_replay(struct mlxsw_sp_rif *rif,
10080 struct netlink_ext_ack *extack)
10082 struct mlxsw_sp_macvlan_replay rms = {
10083 .mlxsw_sp = rif->mlxsw_sp,
10086 struct netdev_nested_priv priv = {
10090 return netdev_walk_all_upper_dev_rcu(mlxsw_sp_rif_dev(rif),
10091 mlxsw_sp_macvlan_replay_upper,
10095 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
10096 struct netdev_nested_priv *priv)
10098 struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data;
10100 if (!netif_is_macvlan(dev))
10103 return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10104 mlxsw_sp_fid_index(rif->fid), false);
10107 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
10109 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10110 struct netdev_nested_priv priv = {
10111 .data = (void *)rif,
10114 if (!netif_is_macvlan_port(dev))
10117 return netdev_walk_all_upper_dev_rcu(dev,
10118 __mlxsw_sp_rif_macvlan_flush, &priv);
10121 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
10122 const struct mlxsw_sp_rif_params *params)
10124 struct mlxsw_sp_rif_subport *rif_subport;
10126 rif_subport = mlxsw_sp_rif_subport_rif(rif);
10127 refcount_set(&rif_subport->ref_count, 1);
10128 rif_subport->vid = params->vid;
10129 rif_subport->lag = params->lag;
10131 rif_subport->lag_id = params->lag_id;
10133 rif_subport->system_port = params->system_port;
10136 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
10138 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10139 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10140 struct mlxsw_sp_rif_subport *rif_subport;
10141 char ritr_pl[MLXSW_REG_RITR_LEN];
10144 rif_subport = mlxsw_sp_rif_subport_rif(rif);
10145 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
10146 rif->rif_index, rif->vr_id, dev->mtu);
10147 mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr);
10148 mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
10149 efid = mlxsw_sp_fid_index(rif->fid);
10150 mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
10151 rif_subport->lag ? rif_subport->lag_id :
10152 rif_subport->system_port,
10154 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10157 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif,
10158 struct netlink_ext_ack *extack)
10160 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10164 err = mlxsw_sp_rif_mac_profile_get(rif->mlxsw_sp, rif->addr,
10165 &mac_profile, extack);
10168 rif->mac_profile_id = mac_profile;
10170 err = mlxsw_sp_rif_subport_op(rif, true);
10172 goto err_rif_subport_op;
10174 err = mlxsw_sp_macvlan_replay(rif, extack);
10176 goto err_macvlan_replay;
10178 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10179 mlxsw_sp_fid_index(rif->fid), true);
10181 goto err_rif_fdb_op;
10183 err = mlxsw_sp_fid_rif_set(rif->fid, rif);
10185 goto err_fid_rif_set;
10190 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10191 mlxsw_sp_fid_index(rif->fid), false);
10193 mlxsw_sp_rif_macvlan_flush(rif);
10194 err_macvlan_replay:
10195 mlxsw_sp_rif_subport_op(rif, false);
10196 err_rif_subport_op:
10197 mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, mac_profile);
10201 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
10203 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10204 struct mlxsw_sp_fid *fid = rif->fid;
10206 mlxsw_sp_fid_rif_unset(fid);
10207 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10208 mlxsw_sp_fid_index(fid), false);
10209 mlxsw_sp_rif_macvlan_flush(rif);
10210 mlxsw_sp_rif_subport_op(rif, false);
10211 mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
10214 static struct mlxsw_sp_fid *
10215 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
10216 const struct mlxsw_sp_rif_params *params,
10217 struct netlink_ext_ack *extack)
10219 return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
10222 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
10223 .type = MLXSW_SP_RIF_TYPE_SUBPORT,
10224 .rif_size = sizeof(struct mlxsw_sp_rif_subport),
10225 .setup = mlxsw_sp_rif_subport_setup,
10226 .configure = mlxsw_sp_rif_subport_configure,
10227 .deconfigure = mlxsw_sp_rif_subport_deconfigure,
10228 .fid_get = mlxsw_sp_rif_subport_fid_get,
10231 static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable)
10233 enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF;
10234 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10235 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10236 char ritr_pl[MLXSW_REG_RITR_LEN];
10238 mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
10240 mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr);
10241 mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
10242 mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid);
10244 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10247 u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
10249 return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
10252 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif,
10253 struct netlink_ext_ack *extack)
10255 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10256 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10257 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
10261 err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
10262 &mac_profile, extack);
10265 rif->mac_profile_id = mac_profile;
10267 err = mlxsw_sp_rif_fid_op(rif, fid_index, true);
10269 goto err_rif_fid_op;
10271 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10272 mlxsw_sp_router_port(mlxsw_sp), true);
10274 goto err_fid_mc_flood_set;
10276 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10277 mlxsw_sp_router_port(mlxsw_sp), true);
10279 goto err_fid_bc_flood_set;
10281 err = mlxsw_sp_macvlan_replay(rif, extack);
10283 goto err_macvlan_replay;
10285 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10286 mlxsw_sp_fid_index(rif->fid), true);
10288 goto err_rif_fdb_op;
10290 err = mlxsw_sp_fid_rif_set(rif->fid, rif);
10292 goto err_fid_rif_set;
10297 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10298 mlxsw_sp_fid_index(rif->fid), false);
10300 mlxsw_sp_rif_macvlan_flush(rif);
10301 err_macvlan_replay:
10302 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10303 mlxsw_sp_router_port(mlxsw_sp), false);
10304 err_fid_bc_flood_set:
10305 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10306 mlxsw_sp_router_port(mlxsw_sp), false);
10307 err_fid_mc_flood_set:
10308 mlxsw_sp_rif_fid_op(rif, fid_index, false);
10310 mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
10314 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
10316 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10317 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
10318 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10319 struct mlxsw_sp_fid *fid = rif->fid;
10321 mlxsw_sp_fid_rif_unset(fid);
10322 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10323 mlxsw_sp_fid_index(fid), false);
10324 mlxsw_sp_rif_macvlan_flush(rif);
10325 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10326 mlxsw_sp_router_port(mlxsw_sp), false);
10327 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10328 mlxsw_sp_router_port(mlxsw_sp), false);
10329 mlxsw_sp_rif_fid_op(rif, fid_index, false);
10330 mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
10333 static struct mlxsw_sp_fid *
10334 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
10335 const struct mlxsw_sp_rif_params *params,
10336 struct netlink_ext_ack *extack)
10338 int rif_ifindex = mlxsw_sp_rif_dev_ifindex(rif);
10340 return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif_ifindex);
10343 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
10345 struct switchdev_notifier_fdb_info info = {};
10346 struct net_device *dev;
10348 dev = br_fdb_find_port(mlxsw_sp_rif_dev(rif), mac, 0);
10354 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
10358 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
10359 .type = MLXSW_SP_RIF_TYPE_FID,
10360 .rif_size = sizeof(struct mlxsw_sp_rif),
10361 .configure = mlxsw_sp_rif_fid_configure,
10362 .deconfigure = mlxsw_sp_rif_fid_deconfigure,
10363 .fid_get = mlxsw_sp_rif_fid_fid_get,
10364 .fdb_del = mlxsw_sp_rif_fid_fdb_del,
10367 static struct mlxsw_sp_fid *
10368 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
10369 const struct mlxsw_sp_rif_params *params,
10370 struct netlink_ext_ack *extack)
10372 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10373 struct net_device *br_dev;
10375 if (WARN_ON(!params->vid))
10376 return ERR_PTR(-EINVAL);
10378 if (is_vlan_dev(dev)) {
10379 br_dev = vlan_dev_real_dev(dev);
10380 if (WARN_ON(!netif_is_bridge_master(br_dev)))
10381 return ERR_PTR(-EINVAL);
10384 return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, params->vid);
10387 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
10389 struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
10390 struct switchdev_notifier_fdb_info info = {};
10391 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
10392 struct net_device *br_dev;
10393 struct net_device *dev;
10395 br_dev = is_vlan_dev(rif_dev) ? vlan_dev_real_dev(rif_dev) : rif_dev;
10396 dev = br_fdb_find_port(br_dev, mac, vid);
10402 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
10406 static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid,
10409 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10410 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10411 char ritr_pl[MLXSW_REG_RITR_LEN];
10413 mlxsw_reg_ritr_vlan_if_pack(ritr_pl, enable, rif->rif_index, rif->vr_id,
10414 dev->mtu, dev->dev_addr,
10415 rif->mac_profile_id, vid, efid);
10417 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10420 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid,
10421 struct netlink_ext_ack *extack)
10423 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10424 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
10425 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10429 err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
10430 &mac_profile, extack);
10433 rif->mac_profile_id = mac_profile;
10435 err = mlxsw_sp_rif_vlan_op(rif, vid, efid, true);
10437 goto err_rif_vlan_fid_op;
10439 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10440 mlxsw_sp_router_port(mlxsw_sp), true);
10442 goto err_fid_mc_flood_set;
10444 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10445 mlxsw_sp_router_port(mlxsw_sp), true);
10447 goto err_fid_bc_flood_set;
10449 err = mlxsw_sp_macvlan_replay(rif, extack);
10451 goto err_macvlan_replay;
10453 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10454 mlxsw_sp_fid_index(rif->fid), true);
10456 goto err_rif_fdb_op;
10458 err = mlxsw_sp_fid_rif_set(rif->fid, rif);
10460 goto err_fid_rif_set;
10465 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10466 mlxsw_sp_fid_index(rif->fid), false);
10468 mlxsw_sp_rif_macvlan_flush(rif);
10469 err_macvlan_replay:
10470 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10471 mlxsw_sp_router_port(mlxsw_sp), false);
10472 err_fid_bc_flood_set:
10473 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10474 mlxsw_sp_router_port(mlxsw_sp), false);
10475 err_fid_mc_flood_set:
10476 mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
10477 err_rif_vlan_fid_op:
10478 mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
10482 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
10484 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10485 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
10486 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10488 mlxsw_sp_fid_rif_unset(rif->fid);
10489 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10490 mlxsw_sp_fid_index(rif->fid), false);
10491 mlxsw_sp_rif_macvlan_flush(rif);
10492 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10493 mlxsw_sp_router_port(mlxsw_sp), false);
10494 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10495 mlxsw_sp_router_port(mlxsw_sp), false);
10496 mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
10497 mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
10500 static int mlxsw_sp1_rif_vlan_configure(struct mlxsw_sp_rif *rif,
10501 struct netlink_ext_ack *extack)
10503 return mlxsw_sp_rif_vlan_configure(rif, 0, extack);
10506 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_vlan_ops = {
10507 .type = MLXSW_SP_RIF_TYPE_VLAN,
10508 .rif_size = sizeof(struct mlxsw_sp_rif),
10509 .configure = mlxsw_sp1_rif_vlan_configure,
10510 .deconfigure = mlxsw_sp_rif_vlan_deconfigure,
10511 .fid_get = mlxsw_sp_rif_vlan_fid_get,
10512 .fdb_del = mlxsw_sp_rif_vlan_fdb_del,
10515 static int mlxsw_sp2_rif_vlan_configure(struct mlxsw_sp_rif *rif,
10516 struct netlink_ext_ack *extack)
10518 u16 efid = mlxsw_sp_fid_index(rif->fid);
10520 return mlxsw_sp_rif_vlan_configure(rif, efid, extack);
10523 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_vlan_ops = {
10524 .type = MLXSW_SP_RIF_TYPE_VLAN,
10525 .rif_size = sizeof(struct mlxsw_sp_rif),
10526 .configure = mlxsw_sp2_rif_vlan_configure,
10527 .deconfigure = mlxsw_sp_rif_vlan_deconfigure,
10528 .fid_get = mlxsw_sp_rif_vlan_fid_get,
10529 .fdb_del = mlxsw_sp_rif_vlan_fdb_del,
10532 static struct mlxsw_sp_rif_ipip_lb *
10533 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
10535 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
10539 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
10540 const struct mlxsw_sp_rif_params *params)
10542 struct mlxsw_sp_rif_params_ipip_lb *params_lb;
10543 struct mlxsw_sp_rif_ipip_lb *rif_lb;
10545 params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
10547 rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
10548 rif_lb->lb_config = params_lb->lb_config;
10552 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
10553 struct netlink_ext_ack *extack)
10555 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10556 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10557 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
10558 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10559 struct mlxsw_sp_vr *ul_vr;
10562 ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, extack);
10564 return PTR_ERR(ul_vr);
10566 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
10568 goto err_loopback_op;
10570 lb_rif->ul_vr_id = ul_vr->id;
10571 lb_rif->ul_rif_id = 0;
10572 ++ul_vr->rif_count;
10576 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
10580 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
10582 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10583 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10584 struct mlxsw_sp_vr *ul_vr;
10586 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
10587 mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
10589 --ul_vr->rif_count;
10590 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
10593 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
10594 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
10595 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
10596 .setup = mlxsw_sp_rif_ipip_lb_setup,
10597 .configure = mlxsw_sp1_rif_ipip_lb_configure,
10598 .deconfigure = mlxsw_sp1_rif_ipip_lb_deconfigure,
10601 static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
10602 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
10603 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp1_rif_vlan_ops,
10604 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
10605 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops,
10609 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
10611 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
10612 char ritr_pl[MLXSW_REG_RITR_LEN];
10614 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
10615 ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
10616 mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
10617 MLXSW_REG_RITR_LOOPBACK_GENERIC);
10619 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10622 static struct mlxsw_sp_rif *
10623 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
10624 struct mlxsw_sp_crif *ul_crif,
10625 struct netlink_ext_ack *extack)
10627 struct mlxsw_sp_rif *ul_rif;
10628 u8 rif_entries = 1;
10632 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
10634 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
10635 return ERR_PTR(err);
10638 ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id,
10642 goto err_rif_alloc;
10645 mlxsw_sp->router->rifs[rif_index] = ul_rif;
10646 ul_rif->mlxsw_sp = mlxsw_sp;
10647 ul_rif->rif_entries = rif_entries;
10648 err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
10650 goto ul_rif_op_err;
10652 atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
10656 mlxsw_sp->router->rifs[rif_index] = NULL;
10657 mlxsw_sp_rif_free(ul_rif);
10659 mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
10660 return ERR_PTR(err);
10663 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
10665 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
10666 u8 rif_entries = ul_rif->rif_entries;
10667 u16 rif_index = ul_rif->rif_index;
10669 atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
10670 mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
10671 mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
10672 mlxsw_sp_rif_free(ul_rif);
10673 mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
10676 static struct mlxsw_sp_rif *
10677 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
10678 struct mlxsw_sp_crif *ul_crif,
10679 struct netlink_ext_ack *extack)
10681 struct mlxsw_sp_vr *vr;
10684 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
10686 return ERR_CAST(vr);
10688 if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
10691 vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, ul_crif, extack);
10692 if (IS_ERR(vr->ul_rif)) {
10693 err = PTR_ERR(vr->ul_rif);
10694 goto err_ul_rif_create;
10698 refcount_set(&vr->ul_rif_refcnt, 1);
10703 mlxsw_sp_vr_put(mlxsw_sp, vr);
10704 return ERR_PTR(err);
10707 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
10709 struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
10710 struct mlxsw_sp_vr *vr;
10712 vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
10714 if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
10718 mlxsw_sp_ul_rif_destroy(ul_rif);
10719 mlxsw_sp_vr_put(mlxsw_sp, vr);
10722 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
10725 struct mlxsw_sp_rif *ul_rif;
10728 mutex_lock(&mlxsw_sp->router->lock);
10729 ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, NULL);
10730 if (IS_ERR(ul_rif)) {
10731 err = PTR_ERR(ul_rif);
10734 *ul_rif_index = ul_rif->rif_index;
10736 mutex_unlock(&mlxsw_sp->router->lock);
10740 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
10742 struct mlxsw_sp_rif *ul_rif;
10744 mutex_lock(&mlxsw_sp->router->lock);
10745 ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
10746 if (WARN_ON(!ul_rif))
10749 mlxsw_sp_ul_rif_put(ul_rif);
10751 mutex_unlock(&mlxsw_sp->router->lock);
10755 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
10756 struct netlink_ext_ack *extack)
10758 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10759 struct net_device *dev = mlxsw_sp_rif_dev(rif);
10760 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
10761 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10762 struct mlxsw_sp_rif *ul_rif;
10765 ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, extack);
10766 if (IS_ERR(ul_rif))
10767 return PTR_ERR(ul_rif);
10769 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
10771 goto err_loopback_op;
10773 lb_rif->ul_vr_id = 0;
10774 lb_rif->ul_rif_id = ul_rif->rif_index;
10779 mlxsw_sp_ul_rif_put(ul_rif);
10783 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
10785 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10786 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10787 struct mlxsw_sp_rif *ul_rif;
10789 ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
10790 mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
10791 mlxsw_sp_ul_rif_put(ul_rif);
10794 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
10795 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
10796 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
10797 .setup = mlxsw_sp_rif_ipip_lb_setup,
10798 .configure = mlxsw_sp2_rif_ipip_lb_configure,
10799 .deconfigure = mlxsw_sp2_rif_ipip_lb_deconfigure,
10802 static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
10803 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
10804 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp2_rif_vlan_ops,
10805 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
10806 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops,
10809 static int mlxsw_sp_rifs_table_init(struct mlxsw_sp *mlxsw_sp)
10811 struct gen_pool *rifs_table;
10814 rifs_table = gen_pool_create(0, -1);
10818 gen_pool_set_algo(rifs_table, gen_pool_first_fit_order_align,
10821 err = gen_pool_add(rifs_table, MLXSW_SP_ROUTER_GENALLOC_OFFSET,
10822 MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS), -1);
10824 goto err_gen_pool_add;
10826 mlxsw_sp->router->rifs_table = rifs_table;
10831 gen_pool_destroy(rifs_table);
10835 static void mlxsw_sp_rifs_table_fini(struct mlxsw_sp *mlxsw_sp)
10837 gen_pool_destroy(mlxsw_sp->router->rifs_table);
10840 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
10842 u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10843 struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
10844 struct mlxsw_core *core = mlxsw_sp->core;
10847 if (!MLXSW_CORE_RES_VALID(core, MAX_RIF_MAC_PROFILES))
10849 mlxsw_sp->router->max_rif_mac_profile =
10850 MLXSW_CORE_RES_GET(core, MAX_RIF_MAC_PROFILES);
10852 mlxsw_sp->router->rifs = kcalloc(max_rifs,
10853 sizeof(struct mlxsw_sp_rif *),
10855 if (!mlxsw_sp->router->rifs)
10858 err = mlxsw_sp_rifs_table_init(mlxsw_sp);
10860 goto err_rifs_table_init;
10862 idr_init(&mlxsw_sp->router->rif_mac_profiles_idr);
10863 atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0);
10864 atomic_set(&mlxsw_sp->router->rifs_count, 0);
10865 devl_resource_occ_get_register(devlink,
10866 MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
10867 mlxsw_sp_rif_mac_profiles_occ_get,
10869 devl_resource_occ_get_register(devlink,
10870 MLXSW_SP_RESOURCE_RIFS,
10871 mlxsw_sp_rifs_occ_get,
10876 err_rifs_table_init:
10877 kfree(mlxsw_sp->router->rifs);
10881 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
10883 int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10884 struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
10887 WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count));
10888 for (i = 0; i < max_rifs; i++)
10889 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
10891 devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS);
10892 devl_resource_occ_get_unregister(devlink,
10893 MLXSW_SP_RESOURCE_RIF_MAC_PROFILES);
10894 WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr));
10895 idr_destroy(&mlxsw_sp->router->rif_mac_profiles_idr);
10896 mlxsw_sp_rifs_table_fini(mlxsw_sp);
10897 kfree(mlxsw_sp->router->rifs);
10901 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
10903 char tigcr_pl[MLXSW_REG_TIGCR_LEN];
10905 mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
10906 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
10909 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
10913 INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
10915 err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
10918 err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
10922 return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
10925 static int mlxsw_sp1_ipips_init(struct mlxsw_sp *mlxsw_sp)
10927 mlxsw_sp->router->ipip_ops_arr = mlxsw_sp1_ipip_ops_arr;
10928 return mlxsw_sp_ipips_init(mlxsw_sp);
10931 static int mlxsw_sp2_ipips_init(struct mlxsw_sp *mlxsw_sp)
10933 mlxsw_sp->router->ipip_ops_arr = mlxsw_sp2_ipip_ops_arr;
10934 return mlxsw_sp_ipips_init(mlxsw_sp);
10937 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
10939 WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
10942 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
10944 struct mlxsw_sp_router *router;
10946 /* Flush pending FIB notifications and then flush the device's
10947 * table before requesting another dump. The FIB notification
10948 * block is unregistered, so no need to take RTNL.
10950 mlxsw_core_flush_owq();
10951 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
10952 mlxsw_sp_router_fib_flush(router->mlxsw_sp);
10955 #ifdef CONFIG_IP_ROUTE_MULTIPATH
10956 struct mlxsw_sp_mp_hash_config {
10957 DECLARE_BITMAP(headers, __MLXSW_REG_RECR2_HEADER_CNT);
10958 DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT);
10959 DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT);
10960 DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT);
10961 bool inc_parsing_depth;
10964 #define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \
10965 bitmap_set(_headers, MLXSW_REG_RECR2_##_header, 1)
10967 #define MLXSW_SP_MP_HASH_FIELD_SET(_fields, _field) \
10968 bitmap_set(_fields, MLXSW_REG_RECR2_##_field, 1)
10970 #define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \
10971 bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr)
10973 static void mlxsw_sp_mp_hash_inner_l3(struct mlxsw_sp_mp_hash_config *config)
10975 unsigned long *inner_headers = config->inner_headers;
10976 unsigned long *inner_fields = config->inner_fields;
10979 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
10980 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
10981 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
10982 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
10984 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
10985 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
10986 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
10987 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
10988 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
10989 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
10990 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
10991 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
10994 static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
10996 unsigned long *headers = config->headers;
10997 unsigned long *fields = config->fields;
10999 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
11000 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
11001 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
11002 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
11006 mlxsw_sp_mp_hash_inner_custom(struct mlxsw_sp_mp_hash_config *config,
11009 unsigned long *inner_headers = config->inner_headers;
11010 unsigned long *inner_fields = config->inner_fields;
11013 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
11014 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
11015 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
11016 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
11017 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
11018 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
11019 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
11020 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV4_PROTOCOL);
11022 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
11023 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
11024 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) {
11025 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
11026 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
11028 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) {
11029 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
11030 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
11032 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
11033 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
11034 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
11035 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
11037 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV4);
11038 MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV6);
11039 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
11040 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_SPORT);
11041 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
11042 MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_DPORT);
11045 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
11046 struct mlxsw_sp_mp_hash_config *config)
11048 struct net *net = mlxsw_sp_net(mlxsw_sp);
11049 unsigned long *headers = config->headers;
11050 unsigned long *fields = config->fields;
11053 switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
11055 mlxsw_sp_mp4_hash_outer_addr(config);
11058 mlxsw_sp_mp4_hash_outer_addr(config);
11059 MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
11060 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
11061 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
11062 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
11066 mlxsw_sp_mp4_hash_outer_addr(config);
11068 mlxsw_sp_mp_hash_inner_l3(config);
11071 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
11073 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
11074 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
11075 MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
11076 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
11077 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
11078 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
11079 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
11080 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
11081 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
11082 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
11083 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
11084 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
11085 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
11087 mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
11092 static void mlxsw_sp_mp6_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
11094 unsigned long *headers = config->headers;
11095 unsigned long *fields = config->fields;
11097 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
11098 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
11099 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
11100 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
11101 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
11102 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
11105 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
11106 struct mlxsw_sp_mp_hash_config *config)
11108 u32 hash_fields = ip6_multipath_hash_fields(mlxsw_sp_net(mlxsw_sp));
11109 unsigned long *headers = config->headers;
11110 unsigned long *fields = config->fields;
11112 switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) {
11114 mlxsw_sp_mp6_hash_outer_addr(config);
11115 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
11116 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
11119 mlxsw_sp_mp6_hash_outer_addr(config);
11120 MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
11121 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
11122 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
11123 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
11127 mlxsw_sp_mp6_hash_outer_addr(config);
11128 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
11129 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
11131 mlxsw_sp_mp_hash_inner_l3(config);
11132 config->inc_parsing_depth = true;
11136 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
11137 MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
11138 MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
11139 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) {
11140 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
11141 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
11143 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) {
11144 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
11145 MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
11147 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
11148 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
11149 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
11150 MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
11151 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
11152 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
11153 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
11154 MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
11156 mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
11157 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
11158 config->inc_parsing_depth = true;
11163 static int mlxsw_sp_mp_hash_parsing_depth_adjust(struct mlxsw_sp *mlxsw_sp,
11164 bool old_inc_parsing_depth,
11165 bool new_inc_parsing_depth)
11169 if (!old_inc_parsing_depth && new_inc_parsing_depth) {
11170 err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
11173 mlxsw_sp->router->inc_parsing_depth = true;
11174 } else if (old_inc_parsing_depth && !new_inc_parsing_depth) {
11175 mlxsw_sp_parsing_depth_dec(mlxsw_sp);
11176 mlxsw_sp->router->inc_parsing_depth = false;
11182 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
11184 bool old_inc_parsing_depth, new_inc_parsing_depth;
11185 struct mlxsw_sp_mp_hash_config config = {};
11186 char recr2_pl[MLXSW_REG_RECR2_LEN];
11191 seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
11192 mlxsw_reg_recr2_pack(recr2_pl, seed);
11193 mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
11194 mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);
11196 old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
11197 new_inc_parsing_depth = config.inc_parsing_depth;
11198 err = mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp,
11199 old_inc_parsing_depth,
11200 new_inc_parsing_depth);
11204 for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT)
11205 mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1);
11206 for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT)
11207 mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, bit, 1);
11208 for_each_set_bit(bit, config.inner_headers, __MLXSW_REG_RECR2_HEADER_CNT)
11209 mlxsw_reg_recr2_inner_header_enables_set(recr2_pl, bit, 1);
11210 for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT)
11211 mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1);
11213 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
11215 goto err_reg_write;
11220 mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, new_inc_parsing_depth,
11221 old_inc_parsing_depth);
11225 static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
11227 bool old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
11229 mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, old_inc_parsing_depth,
11233 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
11238 static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
11243 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
11245 char rdpm_pl[MLXSW_REG_RDPM_LEN];
11248 MLXSW_REG_ZERO(rdpm, rdpm_pl);
11250 /* HW is determining switch priority based on DSCP-bits, but the
11251 * kernel is still doing that based on the ToS. Since there's a
11252 * mismatch in bits we need to make sure to translate the right
11253 * value ToS would observe, skipping the 2 least-significant ECN bits.
11255 for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
11256 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
11258 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
11261 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
11263 struct net *net = mlxsw_sp_net(mlxsw_sp);
11264 char rgcr_pl[MLXSW_REG_RGCR_LEN];
11268 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
11270 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
11271 usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
11273 mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
11274 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
11275 mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
11276 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
11279 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
11281 char rgcr_pl[MLXSW_REG_RGCR_LEN];
11283 mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
11284 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
11287 static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp,
11288 struct netlink_ext_ack *extack)
11290 struct mlxsw_sp_router *router = mlxsw_sp->router;
11291 struct mlxsw_sp_rif *lb_rif;
11294 router->lb_crif = mlxsw_sp_crif_alloc(NULL);
11295 if (!router->lb_crif)
11298 /* Create a generic loopback RIF associated with the main table
11299 * (default VRF). Any table can be used, but the main table exists
11300 * anyway, so we do not waste resources. Loopback RIFs are usually
11301 * created with a NULL CRIF, but this RIF is used as a fallback RIF
11302 * for blackhole nexthops, and nexthops expect to have a valid CRIF.
11304 lb_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, router->lb_crif,
11306 if (IS_ERR(lb_rif)) {
11307 err = PTR_ERR(lb_rif);
11308 goto err_ul_rif_get;
11314 mlxsw_sp_crif_free(router->lb_crif);
11318 static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
11320 mlxsw_sp_ul_rif_put(mlxsw_sp->router->lb_crif->rif);
11321 mlxsw_sp_crif_free(mlxsw_sp->router->lb_crif);
11324 static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp)
11326 size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges);
11328 mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
11329 mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges;
11330 mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
11335 const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = {
11336 .init = mlxsw_sp1_router_init,
11337 .ipips_init = mlxsw_sp1_ipips_init,
11340 static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp)
11342 size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges);
11344 mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
11345 mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges;
11346 mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
11351 const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = {
11352 .init = mlxsw_sp2_router_init,
11353 .ipips_init = mlxsw_sp2_ipips_init,
11356 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
11357 struct netlink_ext_ack *extack)
11359 struct mlxsw_sp_router *router;
11360 struct notifier_block *nb;
11363 router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
11366 mutex_init(&router->lock);
11367 mlxsw_sp->router = router;
11368 router->mlxsw_sp = mlxsw_sp;
11370 err = mlxsw_sp->router_ops->init(mlxsw_sp);
11372 goto err_router_ops_init;
11374 INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list);
11375 INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw,
11376 mlxsw_sp_nh_grp_activity_work);
11377 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
11378 err = __mlxsw_sp_router_init(mlxsw_sp);
11380 goto err_router_init;
11382 err = mlxsw_sp->router_ops->ipips_init(mlxsw_sp);
11384 goto err_ipips_init;
11386 err = rhashtable_init(&mlxsw_sp->router->crif_ht,
11387 &mlxsw_sp_crif_ht_params);
11389 goto err_crif_ht_init;
11391 err = mlxsw_sp_rifs_init(mlxsw_sp);
11393 goto err_rifs_init;
11395 err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
11396 &mlxsw_sp_nexthop_ht_params);
11398 goto err_nexthop_ht_init;
11400 err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
11401 &mlxsw_sp_nexthop_group_ht_params);
11403 goto err_nexthop_group_ht_init;
11405 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
11406 err = mlxsw_sp_lpm_init(mlxsw_sp);
11410 err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
11414 err = mlxsw_sp_vrs_init(mlxsw_sp);
11418 err = mlxsw_sp_lb_rif_init(mlxsw_sp, extack);
11420 goto err_lb_rif_init;
11422 err = mlxsw_sp_neigh_init(mlxsw_sp);
11424 goto err_neigh_init;
11426 err = mlxsw_sp_mp_hash_init(mlxsw_sp);
11428 goto err_mp_hash_init;
11430 err = mlxsw_sp_dscp_init(mlxsw_sp);
11432 goto err_dscp_init;
11434 router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
11435 err = register_inetaddr_notifier(&router->inetaddr_nb);
11437 goto err_register_inetaddr_notifier;
11439 router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
11440 err = register_inet6addr_notifier(&router->inet6addr_nb);
11442 goto err_register_inet6addr_notifier;
11444 router->inetaddr_valid_nb.notifier_call = mlxsw_sp_inetaddr_valid_event;
11445 err = register_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
11447 goto err_register_inetaddr_valid_notifier;
11449 nb = &router->inet6addr_valid_nb;
11450 nb->notifier_call = mlxsw_sp_inet6addr_valid_event;
11451 err = register_inet6addr_validator_notifier(nb);
11453 goto err_register_inet6addr_valid_notifier;
11455 mlxsw_sp->router->netevent_nb.notifier_call =
11456 mlxsw_sp_router_netevent_event;
11457 err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
11459 goto err_register_netevent_notifier;
11461 mlxsw_sp->router->nexthop_nb.notifier_call =
11462 mlxsw_sp_nexthop_obj_event;
11463 err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
11464 &mlxsw_sp->router->nexthop_nb,
11467 goto err_register_nexthop_notifier;
11469 mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
11470 err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
11471 &mlxsw_sp->router->fib_nb,
11472 mlxsw_sp_router_fib_dump_flush, extack);
11474 goto err_register_fib_notifier;
11476 mlxsw_sp->router->netdevice_nb.notifier_call =
11477 mlxsw_sp_router_netdevice_event;
11478 err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
11479 &mlxsw_sp->router->netdevice_nb);
11481 goto err_register_netdev_notifier;
11485 err_register_netdev_notifier:
11486 unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
11487 &mlxsw_sp->router->fib_nb);
11488 err_register_fib_notifier:
11489 unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
11490 &mlxsw_sp->router->nexthop_nb);
11491 err_register_nexthop_notifier:
11492 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
11493 err_register_netevent_notifier:
11494 unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb);
11495 err_register_inet6addr_valid_notifier:
11496 unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
11497 err_register_inetaddr_valid_notifier:
11498 unregister_inet6addr_notifier(&router->inet6addr_nb);
11499 err_register_inet6addr_notifier:
11500 unregister_inetaddr_notifier(&router->inetaddr_nb);
11501 err_register_inetaddr_notifier:
11502 mlxsw_core_flush_owq();
11504 mlxsw_sp_mp_hash_fini(mlxsw_sp);
11506 mlxsw_sp_neigh_fini(mlxsw_sp);
11508 mlxsw_sp_lb_rif_fini(mlxsw_sp);
11510 mlxsw_sp_vrs_fini(mlxsw_sp);
11512 mlxsw_sp_mr_fini(mlxsw_sp);
11514 mlxsw_sp_lpm_fini(mlxsw_sp);
11516 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
11517 err_nexthop_group_ht_init:
11518 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
11519 err_nexthop_ht_init:
11520 mlxsw_sp_rifs_fini(mlxsw_sp);
11522 rhashtable_destroy(&mlxsw_sp->router->crif_ht);
11524 mlxsw_sp_ipips_fini(mlxsw_sp);
11526 __mlxsw_sp_router_fini(mlxsw_sp);
11528 cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
11529 err_router_ops_init:
11530 mutex_destroy(&mlxsw_sp->router->lock);
11531 kfree(mlxsw_sp->router);
11535 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
11537 struct mlxsw_sp_router *router = mlxsw_sp->router;
11539 unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
11540 &router->netdevice_nb);
11541 unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &router->fib_nb);
11542 unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
11543 &router->nexthop_nb);
11544 unregister_netevent_notifier(&router->netevent_nb);
11545 unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb);
11546 unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
11547 unregister_inet6addr_notifier(&router->inet6addr_nb);
11548 unregister_inetaddr_notifier(&router->inetaddr_nb);
11549 mlxsw_core_flush_owq();
11550 mlxsw_sp_mp_hash_fini(mlxsw_sp);
11551 mlxsw_sp_neigh_fini(mlxsw_sp);
11552 mlxsw_sp_lb_rif_fini(mlxsw_sp);
11553 mlxsw_sp_vrs_fini(mlxsw_sp);
11554 mlxsw_sp_mr_fini(mlxsw_sp);
11555 mlxsw_sp_lpm_fini(mlxsw_sp);
11556 rhashtable_destroy(&router->nexthop_group_ht);
11557 rhashtable_destroy(&router->nexthop_ht);
11558 mlxsw_sp_rifs_fini(mlxsw_sp);
11559 rhashtable_destroy(&mlxsw_sp->router->crif_ht);
11560 mlxsw_sp_ipips_fini(mlxsw_sp);
11561 __mlxsw_sp_router_fini(mlxsw_sp);
11562 cancel_delayed_work_sync(&router->nh_grp_activity_dw);
11563 mutex_destroy(&router->lock);