56c54d0fbacc8275773df0cf2eaa64373351b5c0
[platform/kernel/linux-rpi.git] / net / ipv4 / nexthop.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Generic nexthop implementation
3  *
4  * Copyright (c) 2017-19 Cumulus Networks
5  * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
6  */
7
8 #include <linux/nexthop.h>
9 #include <linux/rtnetlink.h>
10 #include <linux/slab.h>
11 #include <net/arp.h>
12 #include <net/ipv6_stubs.h>
13 #include <net/lwtunnel.h>
14 #include <net/ndisc.h>
15 #include <net/nexthop.h>
16 #include <net/route.h>
17 #include <net/sock.h>
18
19 static void remove_nexthop(struct net *net, struct nexthop *nh,
20                            struct nl_info *nlinfo);
21
22 #define NH_DEV_HASHBITS  8
23 #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
24
25 static const struct nla_policy rtm_nh_policy_new[] = {
26         [NHA_ID]                = { .type = NLA_U32 },
27         [NHA_GROUP]             = { .type = NLA_BINARY },
28         [NHA_GROUP_TYPE]        = { .type = NLA_U16 },
29         [NHA_BLACKHOLE]         = { .type = NLA_FLAG },
30         [NHA_OIF]               = { .type = NLA_U32 },
31         [NHA_GATEWAY]           = { .type = NLA_BINARY },
32         [NHA_ENCAP_TYPE]        = { .type = NLA_U16 },
33         [NHA_ENCAP]             = { .type = NLA_NESTED },
34         [NHA_FDB]               = { .type = NLA_FLAG },
35 };
36
37 static const struct nla_policy rtm_nh_policy_get[] = {
38         [NHA_ID]                = { .type = NLA_U32 },
39 };
40
41 static const struct nla_policy rtm_nh_policy_dump[] = {
42         [NHA_OIF]               = { .type = NLA_U32 },
43         [NHA_GROUPS]            = { .type = NLA_FLAG },
44         [NHA_MASTER]            = { .type = NLA_U32 },
45         [NHA_FDB]               = { .type = NLA_FLAG },
46 };
47
48 static bool nexthop_notifiers_is_empty(struct net *net)
49 {
50         return !net->nexthop.notifier_chain.head;
51 }
52
53 static void
54 __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
55                                const struct nh_info *nhi)
56 {
57         nh_info->dev = nhi->fib_nhc.nhc_dev;
58         nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
59         if (nh_info->gw_family == AF_INET)
60                 nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4;
61         else if (nh_info->gw_family == AF_INET6)
62                 nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
63
64         nh_info->is_reject = nhi->reject_nh;
65         nh_info->is_fdb = nhi->fdb_nh;
66         nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
67 }
68
69 static int nh_notifier_single_info_init(struct nh_notifier_info *info,
70                                         const struct nexthop *nh)
71 {
72         struct nh_info *nhi = rtnl_dereference(nh->nh_info);
73
74         info->type = NH_NOTIFIER_INFO_TYPE_SINGLE;
75         info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
76         if (!info->nh)
77                 return -ENOMEM;
78
79         __nh_notifier_single_info_init(info->nh, nhi);
80
81         return 0;
82 }
83
84 static void nh_notifier_single_info_fini(struct nh_notifier_info *info)
85 {
86         kfree(info->nh);
87 }
88
89 static int nh_notifier_mp_info_init(struct nh_notifier_info *info,
90                                     struct nh_group *nhg)
91 {
92         u16 num_nh = nhg->num_nh;
93         int i;
94
95         info->type = NH_NOTIFIER_INFO_TYPE_GRP;
96         info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh),
97                                GFP_KERNEL);
98         if (!info->nh_grp)
99                 return -ENOMEM;
100
101         info->nh_grp->num_nh = num_nh;
102         info->nh_grp->is_fdb = nhg->fdb_nh;
103
104         for (i = 0; i < num_nh; i++) {
105                 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
106                 struct nh_info *nhi;
107
108                 nhi = rtnl_dereference(nhge->nh->nh_info);
109                 info->nh_grp->nh_entries[i].id = nhge->nh->id;
110                 info->nh_grp->nh_entries[i].weight = nhge->weight;
111                 __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
112                                                nhi);
113         }
114
115         return 0;
116 }
117
118 static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
119                                      const struct nexthop *nh)
120 {
121         struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
122
123         if (nhg->mpath)
124                 return nh_notifier_mp_info_init(info, nhg);
125         return -EINVAL;
126 }
127
128 static void nh_notifier_grp_info_fini(struct nh_notifier_info *info,
129                                       const struct nexthop *nh)
130 {
131         struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
132
133         if (nhg->mpath)
134                 kfree(info->nh_grp);
135 }
136
137 static int nh_notifier_info_init(struct nh_notifier_info *info,
138                                  const struct nexthop *nh)
139 {
140         info->id = nh->id;
141
142         if (nh->is_group)
143                 return nh_notifier_grp_info_init(info, nh);
144         else
145                 return nh_notifier_single_info_init(info, nh);
146 }
147
148 static void nh_notifier_info_fini(struct nh_notifier_info *info,
149                                   const struct nexthop *nh)
150 {
151         if (nh->is_group)
152                 nh_notifier_grp_info_fini(info, nh);
153         else
154                 nh_notifier_single_info_fini(info);
155 }
156
157 static int call_nexthop_notifiers(struct net *net,
158                                   enum nexthop_event_type event_type,
159                                   struct nexthop *nh,
160                                   struct netlink_ext_ack *extack)
161 {
162         struct nh_notifier_info info = {
163                 .net = net,
164                 .extack = extack,
165         };
166         int err;
167
168         ASSERT_RTNL();
169
170         if (nexthop_notifiers_is_empty(net))
171                 return 0;
172
173         err = nh_notifier_info_init(&info, nh);
174         if (err) {
175                 NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
176                 return err;
177         }
178
179         err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
180                                            event_type, &info);
181         nh_notifier_info_fini(&info, nh);
182
183         return notifier_to_errno(err);
184 }
185
186 static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
187                                  enum nexthop_event_type event_type,
188                                  struct nexthop *nh,
189                                  struct netlink_ext_ack *extack)
190 {
191         struct nh_notifier_info info = {
192                 .net = net,
193                 .extack = extack,
194         };
195         int err;
196
197         err = nh_notifier_info_init(&info, nh);
198         if (err)
199                 return err;
200
201         err = nb->notifier_call(nb, event_type, &info);
202         nh_notifier_info_fini(&info, nh);
203
204         return notifier_to_errno(err);
205 }
206
207 static unsigned int nh_dev_hashfn(unsigned int val)
208 {
209         unsigned int mask = NH_DEV_HASHSIZE - 1;
210
211         return (val ^
212                 (val >> NH_DEV_HASHBITS) ^
213                 (val >> (NH_DEV_HASHBITS * 2))) & mask;
214 }
215
216 static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
217 {
218         struct net_device *dev = nhi->fib_nhc.nhc_dev;
219         struct hlist_head *head;
220         unsigned int hash;
221
222         WARN_ON(!dev);
223
224         hash = nh_dev_hashfn(dev->ifindex);
225         head = &net->nexthop.devhash[hash];
226         hlist_add_head(&nhi->dev_hash, head);
227 }
228
229 static void nexthop_free_group(struct nexthop *nh)
230 {
231         struct nh_group *nhg;
232         int i;
233
234         nhg = rcu_dereference_raw(nh->nh_grp);
235         for (i = 0; i < nhg->num_nh; ++i) {
236                 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
237
238                 WARN_ON(!list_empty(&nhge->nh_list));
239                 nexthop_put(nhge->nh);
240         }
241
242         WARN_ON(nhg->spare == nhg);
243
244         kfree(nhg->spare);
245         kfree(nhg);
246 }
247
248 static void nexthop_free_single(struct nexthop *nh)
249 {
250         struct nh_info *nhi;
251
252         nhi = rcu_dereference_raw(nh->nh_info);
253         switch (nhi->family) {
254         case AF_INET:
255                 fib_nh_release(nh->net, &nhi->fib_nh);
256                 break;
257         case AF_INET6:
258                 ipv6_stub->fib6_nh_release(&nhi->fib6_nh);
259                 break;
260         }
261         kfree(nhi);
262 }
263
264 void nexthop_free_rcu(struct rcu_head *head)
265 {
266         struct nexthop *nh = container_of(head, struct nexthop, rcu);
267
268         if (nh->is_group)
269                 nexthop_free_group(nh);
270         else
271                 nexthop_free_single(nh);
272
273         kfree(nh);
274 }
275 EXPORT_SYMBOL_GPL(nexthop_free_rcu);
276
277 static struct nexthop *nexthop_alloc(void)
278 {
279         struct nexthop *nh;
280
281         nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
282         if (nh) {
283                 INIT_LIST_HEAD(&nh->fi_list);
284                 INIT_LIST_HEAD(&nh->f6i_list);
285                 INIT_LIST_HEAD(&nh->grp_list);
286                 INIT_LIST_HEAD(&nh->fdb_list);
287         }
288         return nh;
289 }
290
291 static struct nh_group *nexthop_grp_alloc(u16 num_nh)
292 {
293         struct nh_group *nhg;
294
295         nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL);
296         if (nhg)
297                 nhg->num_nh = num_nh;
298
299         return nhg;
300 }
301
302 static void nh_base_seq_inc(struct net *net)
303 {
304         while (++net->nexthop.seq == 0)
305                 ;
306 }
307
308 /* no reference taken; rcu lock or rtnl must be held */
309 struct nexthop *nexthop_find_by_id(struct net *net, u32 id)
310 {
311         struct rb_node **pp, *parent = NULL, *next;
312
313         pp = &net->nexthop.rb_root.rb_node;
314         while (1) {
315                 struct nexthop *nh;
316
317                 next = rcu_dereference_raw(*pp);
318                 if (!next)
319                         break;
320                 parent = next;
321
322                 nh = rb_entry(parent, struct nexthop, rb_node);
323                 if (id < nh->id)
324                         pp = &next->rb_left;
325                 else if (id > nh->id)
326                         pp = &next->rb_right;
327                 else
328                         return nh;
329         }
330         return NULL;
331 }
332 EXPORT_SYMBOL_GPL(nexthop_find_by_id);
333
334 /* used for auto id allocation; called with rtnl held */
335 static u32 nh_find_unused_id(struct net *net)
336 {
337         u32 id_start = net->nexthop.last_id_allocated;
338
339         while (1) {
340                 net->nexthop.last_id_allocated++;
341                 if (net->nexthop.last_id_allocated == id_start)
342                         break;
343
344                 if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated))
345                         return net->nexthop.last_id_allocated;
346         }
347         return 0;
348 }
349
350 static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
351 {
352         struct nexthop_grp *p;
353         size_t len = nhg->num_nh * sizeof(*p);
354         struct nlattr *nla;
355         u16 group_type = 0;
356         int i;
357
358         if (nhg->mpath)
359                 group_type = NEXTHOP_GRP_TYPE_MPATH;
360
361         if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
362                 goto nla_put_failure;
363
364         nla = nla_reserve(skb, NHA_GROUP, len);
365         if (!nla)
366                 goto nla_put_failure;
367
368         p = nla_data(nla);
369         for (i = 0; i < nhg->num_nh; ++i) {
370                 p->id = nhg->nh_entries[i].nh->id;
371                 p->weight = nhg->nh_entries[i].weight - 1;
372                 p += 1;
373         }
374
375         return 0;
376
377 nla_put_failure:
378         return -EMSGSIZE;
379 }
380
381 static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
382                         int event, u32 portid, u32 seq, unsigned int nlflags)
383 {
384         struct fib6_nh *fib6_nh;
385         struct fib_nh *fib_nh;
386         struct nlmsghdr *nlh;
387         struct nh_info *nhi;
388         struct nhmsg *nhm;
389
390         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
391         if (!nlh)
392                 return -EMSGSIZE;
393
394         nhm = nlmsg_data(nlh);
395         nhm->nh_family = AF_UNSPEC;
396         nhm->nh_flags = nh->nh_flags;
397         nhm->nh_protocol = nh->protocol;
398         nhm->nh_scope = 0;
399         nhm->resvd = 0;
400
401         if (nla_put_u32(skb, NHA_ID, nh->id))
402                 goto nla_put_failure;
403
404         if (nh->is_group) {
405                 struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
406
407                 if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
408                         goto nla_put_failure;
409                 if (nla_put_nh_group(skb, nhg))
410                         goto nla_put_failure;
411                 goto out;
412         }
413
414         nhi = rtnl_dereference(nh->nh_info);
415         nhm->nh_family = nhi->family;
416         if (nhi->reject_nh) {
417                 if (nla_put_flag(skb, NHA_BLACKHOLE))
418                         goto nla_put_failure;
419                 goto out;
420         } else if (nhi->fdb_nh) {
421                 if (nla_put_flag(skb, NHA_FDB))
422                         goto nla_put_failure;
423         } else {
424                 const struct net_device *dev;
425
426                 dev = nhi->fib_nhc.nhc_dev;
427                 if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex))
428                         goto nla_put_failure;
429         }
430
431         nhm->nh_scope = nhi->fib_nhc.nhc_scope;
432         switch (nhi->family) {
433         case AF_INET:
434                 fib_nh = &nhi->fib_nh;
435                 if (fib_nh->fib_nh_gw_family &&
436                     nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
437                         goto nla_put_failure;
438                 break;
439
440         case AF_INET6:
441                 fib6_nh = &nhi->fib6_nh;
442                 if (fib6_nh->fib_nh_gw_family &&
443                     nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6))
444                         goto nla_put_failure;
445                 break;
446         }
447
448         if (nhi->fib_nhc.nhc_lwtstate &&
449             lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
450                                 NHA_ENCAP, NHA_ENCAP_TYPE) < 0)
451                 goto nla_put_failure;
452
453 out:
454         nlmsg_end(skb, nlh);
455         return 0;
456
457 nla_put_failure:
458         nlmsg_cancel(skb, nlh);
459         return -EMSGSIZE;
460 }
461
462 static size_t nh_nlmsg_size_grp(struct nexthop *nh)
463 {
464         struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
465         size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
466
467         return nla_total_size(sz) +
468                nla_total_size(2);  /* NHA_GROUP_TYPE */
469 }
470
471 static size_t nh_nlmsg_size_single(struct nexthop *nh)
472 {
473         struct nh_info *nhi = rtnl_dereference(nh->nh_info);
474         size_t sz;
475
476         /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
477          * are mutually exclusive
478          */
479         sz = nla_total_size(4);  /* NHA_OIF */
480
481         switch (nhi->family) {
482         case AF_INET:
483                 if (nhi->fib_nh.fib_nh_gw_family)
484                         sz += nla_total_size(4);  /* NHA_GATEWAY */
485                 break;
486
487         case AF_INET6:
488                 /* NHA_GATEWAY */
489                 if (nhi->fib6_nh.fib_nh_gw_family)
490                         sz += nla_total_size(sizeof(const struct in6_addr));
491                 break;
492         }
493
494         if (nhi->fib_nhc.nhc_lwtstate) {
495                 sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate);
496                 sz += nla_total_size(2);  /* NHA_ENCAP_TYPE */
497         }
498
499         return sz;
500 }
501
502 static size_t nh_nlmsg_size(struct nexthop *nh)
503 {
504         size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg));
505
506         sz += nla_total_size(4); /* NHA_ID */
507
508         if (nh->is_group)
509                 sz += nh_nlmsg_size_grp(nh);
510         else
511                 sz += nh_nlmsg_size_single(nh);
512
513         return sz;
514 }
515
516 static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
517 {
518         unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0;
519         u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
520         struct sk_buff *skb;
521         int err = -ENOBUFS;
522
523         skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any());
524         if (!skb)
525                 goto errout;
526
527         err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
528         if (err < 0) {
529                 /* -EMSGSIZE implies BUG in nh_nlmsg_size() */
530                 WARN_ON(err == -EMSGSIZE);
531                 kfree_skb(skb);
532                 goto errout;
533         }
534
535         rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP,
536                     info->nlh, gfp_any());
537         return;
538 errout:
539         if (err < 0)
540                 rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
541 }
542
543 static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
544                            bool *is_fdb, struct netlink_ext_ack *extack)
545 {
546         if (nh->is_group) {
547                 struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
548
549                 /* nested multipath (group within a group) is not
550                  * supported
551                  */
552                 if (nhg->mpath) {
553                         NL_SET_ERR_MSG(extack,
554                                        "Multipath group can not be a nexthop within a group");
555                         return false;
556                 }
557                 *is_fdb = nhg->fdb_nh;
558         } else {
559                 struct nh_info *nhi = rtnl_dereference(nh->nh_info);
560
561                 if (nhi->reject_nh && npaths > 1) {
562                         NL_SET_ERR_MSG(extack,
563                                        "Blackhole nexthop can not be used in a group with more than 1 path");
564                         return false;
565                 }
566                 *is_fdb = nhi->fdb_nh;
567         }
568
569         return true;
570 }
571
572 static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
573                                    struct netlink_ext_ack *extack)
574 {
575         struct nh_info *nhi;
576
577         nhi = rtnl_dereference(nh->nh_info);
578
579         if (!nhi->fdb_nh) {
580                 NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
581                 return -EINVAL;
582         }
583
584         if (*nh_family == AF_UNSPEC) {
585                 *nh_family = nhi->family;
586         } else if (*nh_family != nhi->family) {
587                 NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops");
588                 return -EINVAL;
589         }
590
591         return 0;
592 }
593
594 static int nh_check_attr_group(struct net *net,
595                                struct nlattr *tb[], size_t tb_size,
596                                struct netlink_ext_ack *extack)
597 {
598         unsigned int len = nla_len(tb[NHA_GROUP]);
599         u8 nh_family = AF_UNSPEC;
600         struct nexthop_grp *nhg;
601         unsigned int i, j;
602         u8 nhg_fdb = 0;
603
604         if (!len || len & (sizeof(struct nexthop_grp) - 1)) {
605                 NL_SET_ERR_MSG(extack,
606                                "Invalid length for nexthop group attribute");
607                 return -EINVAL;
608         }
609
610         /* convert len to number of nexthop ids */
611         len /= sizeof(*nhg);
612
613         nhg = nla_data(tb[NHA_GROUP]);
614         for (i = 0; i < len; ++i) {
615                 if (nhg[i].resvd1 || nhg[i].resvd2) {
616                         NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
617                         return -EINVAL;
618                 }
619                 if (nhg[i].weight > 254) {
620                         NL_SET_ERR_MSG(extack, "Invalid value for weight");
621                         return -EINVAL;
622                 }
623                 for (j = i + 1; j < len; ++j) {
624                         if (nhg[i].id == nhg[j].id) {
625                                 NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group");
626                                 return -EINVAL;
627                         }
628                 }
629         }
630
631         if (tb[NHA_FDB])
632                 nhg_fdb = 1;
633         nhg = nla_data(tb[NHA_GROUP]);
634         for (i = 0; i < len; ++i) {
635                 struct nexthop *nh;
636                 bool is_fdb_nh;
637
638                 nh = nexthop_find_by_id(net, nhg[i].id);
639                 if (!nh) {
640                         NL_SET_ERR_MSG(extack, "Invalid nexthop id");
641                         return -EINVAL;
642                 }
643                 if (!valid_group_nh(nh, len, &is_fdb_nh, extack))
644                         return -EINVAL;
645
646                 if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
647                         return -EINVAL;
648
649                 if (!nhg_fdb && is_fdb_nh) {
650                         NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
651                         return -EINVAL;
652                 }
653         }
654         for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
655                 if (!tb[i])
656                         continue;
657                 if (i == NHA_FDB)
658                         continue;
659                 NL_SET_ERR_MSG(extack,
660                                "No other attributes can be set in nexthop groups");
661                 return -EINVAL;
662         }
663
664         return 0;
665 }
666
667 static bool ipv6_good_nh(const struct fib6_nh *nh)
668 {
669         int state = NUD_REACHABLE;
670         struct neighbour *n;
671
672         rcu_read_lock_bh();
673
674         n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
675         if (n)
676                 state = n->nud_state;
677
678         rcu_read_unlock_bh();
679
680         return !!(state & NUD_VALID);
681 }
682
683 static bool ipv4_good_nh(const struct fib_nh *nh)
684 {
685         int state = NUD_REACHABLE;
686         struct neighbour *n;
687
688         rcu_read_lock_bh();
689
690         n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
691                                       (__force u32)nh->fib_nh_gw4);
692         if (n)
693                 state = n->nud_state;
694
695         rcu_read_unlock_bh();
696
697         return !!(state & NUD_VALID);
698 }
699
700 static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash)
701 {
702         struct nexthop *rc = NULL;
703         int i;
704
705         for (i = 0; i < nhg->num_nh; ++i) {
706                 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
707                 struct nh_info *nhi;
708
709                 if (hash > atomic_read(&nhge->mpath.upper_bound))
710                         continue;
711
712                 nhi = rcu_dereference(nhge->nh->nh_info);
713                 if (nhi->fdb_nh)
714                         return nhge->nh;
715
716                 /* nexthops always check if it is good and does
717                  * not rely on a sysctl for this behavior
718                  */
719                 switch (nhi->family) {
720                 case AF_INET:
721                         if (ipv4_good_nh(&nhi->fib_nh))
722                                 return nhge->nh;
723                         break;
724                 case AF_INET6:
725                         if (ipv6_good_nh(&nhi->fib6_nh))
726                                 return nhge->nh;
727                         break;
728                 }
729
730                 if (!rc)
731                         rc = nhge->nh;
732         }
733
734         return rc;
735 }
736
737 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
738 {
739         struct nh_group *nhg;
740
741         if (!nh->is_group)
742                 return nh;
743
744         nhg = rcu_dereference(nh->nh_grp);
745         if (nhg->mpath)
746                 return nexthop_select_path_mp(nhg, hash);
747
748         /* Unreachable. */
749         return NULL;
750 }
751 EXPORT_SYMBOL_GPL(nexthop_select_path);
752
753 int nexthop_for_each_fib6_nh(struct nexthop *nh,
754                              int (*cb)(struct fib6_nh *nh, void *arg),
755                              void *arg)
756 {
757         struct nh_info *nhi;
758         int err;
759
760         if (nh->is_group) {
761                 struct nh_group *nhg;
762                 int i;
763
764                 nhg = rcu_dereference_rtnl(nh->nh_grp);
765                 for (i = 0; i < nhg->num_nh; i++) {
766                         struct nh_grp_entry *nhge = &nhg->nh_entries[i];
767
768                         nhi = rcu_dereference_rtnl(nhge->nh->nh_info);
769                         err = cb(&nhi->fib6_nh, arg);
770                         if (err)
771                                 return err;
772                 }
773         } else {
774                 nhi = rcu_dereference_rtnl(nh->nh_info);
775                 err = cb(&nhi->fib6_nh, arg);
776                 if (err)
777                         return err;
778         }
779
780         return 0;
781 }
782 EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);
783
784 static int check_src_addr(const struct in6_addr *saddr,
785                           struct netlink_ext_ack *extack)
786 {
787         if (!ipv6_addr_any(saddr)) {
788                 NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
789                 return -EINVAL;
790         }
791         return 0;
792 }
793
794 int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
795                        struct netlink_ext_ack *extack)
796 {
797         struct nh_info *nhi;
798         bool is_fdb_nh;
799
800         /* fib6_src is unique to a fib6_info and limits the ability to cache
801          * routes in fib6_nh within a nexthop that is potentially shared
802          * across multiple fib entries. If the config wants to use source
803          * routing it can not use nexthop objects. mlxsw also does not allow
804          * fib6_src on routes.
805          */
806         if (cfg && check_src_addr(&cfg->fc_src, extack) < 0)
807                 return -EINVAL;
808
809         if (nh->is_group) {
810                 struct nh_group *nhg;
811
812                 nhg = rtnl_dereference(nh->nh_grp);
813                 if (nhg->has_v4)
814                         goto no_v4_nh;
815                 is_fdb_nh = nhg->fdb_nh;
816         } else {
817                 nhi = rtnl_dereference(nh->nh_info);
818                 if (nhi->family == AF_INET)
819                         goto no_v4_nh;
820                 is_fdb_nh = nhi->fdb_nh;
821         }
822
823         if (is_fdb_nh) {
824                 NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
825                 return -EINVAL;
826         }
827
828         return 0;
829 no_v4_nh:
830         NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
831         return -EINVAL;
832 }
833 EXPORT_SYMBOL_GPL(fib6_check_nexthop);
834
835 /* if existing nexthop has ipv6 routes linked to it, need
836  * to verify this new spec works with ipv6
837  */
838 static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
839                               struct netlink_ext_ack *extack)
840 {
841         struct fib6_info *f6i;
842
843         if (list_empty(&old->f6i_list))
844                 return 0;
845
846         list_for_each_entry(f6i, &old->f6i_list, nh_list) {
847                 if (check_src_addr(&f6i->fib6_src.addr, extack) < 0)
848                         return -EINVAL;
849         }
850
851         return fib6_check_nexthop(new, NULL, extack);
852 }
853
854 static int nexthop_check_scope(struct nh_info *nhi, u8 scope,
855                                struct netlink_ext_ack *extack)
856 {
857         if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
858                 NL_SET_ERR_MSG(extack,
859                                "Route with host scope can not have a gateway");
860                 return -EINVAL;
861         }
862
863         if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) {
864                 NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop");
865                 return -EINVAL;
866         }
867
868         return 0;
869 }
870
871 /* Invoked by fib add code to verify nexthop by id is ok with
872  * config for prefix; parts of fib_check_nh not done when nexthop
873  * object is used.
874  */
875 int fib_check_nexthop(struct nexthop *nh, u8 scope,
876                       struct netlink_ext_ack *extack)
877 {
878         struct nh_info *nhi;
879         int err = 0;
880
881         if (nh->is_group) {
882                 struct nh_group *nhg;
883
884                 nhg = rtnl_dereference(nh->nh_grp);
885                 if (nhg->fdb_nh) {
886                         NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
887                         err = -EINVAL;
888                         goto out;
889                 }
890
891                 if (scope == RT_SCOPE_HOST) {
892                         NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
893                         err = -EINVAL;
894                         goto out;
895                 }
896
897                 /* all nexthops in a group have the same scope */
898                 nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info);
899                 err = nexthop_check_scope(nhi, scope, extack);
900         } else {
901                 nhi = rtnl_dereference(nh->nh_info);
902                 if (nhi->fdb_nh) {
903                         NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
904                         err = -EINVAL;
905                         goto out;
906                 }
907                 err = nexthop_check_scope(nhi, scope, extack);
908         }
909
910 out:
911         return err;
912 }
913
914 static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
915                              struct netlink_ext_ack *extack)
916 {
917         struct fib_info *fi;
918
919         list_for_each_entry(fi, &old->fi_list, nh_list) {
920                 int err;
921
922                 err = fib_check_nexthop(new, fi->fib_scope, extack);
923                 if (err)
924                         return err;
925         }
926         return 0;
927 }
928
929 static void nh_group_rebalance(struct nh_group *nhg)
930 {
931         int total = 0;
932         int w = 0;
933         int i;
934
935         for (i = 0; i < nhg->num_nh; ++i)
936                 total += nhg->nh_entries[i].weight;
937
938         for (i = 0; i < nhg->num_nh; ++i) {
939                 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
940                 int upper_bound;
941
942                 w += nhge->weight;
943                 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
944                 atomic_set(&nhge->mpath.upper_bound, upper_bound);
945         }
946 }
947
948 static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
949                                 struct nl_info *nlinfo)
950 {
951         struct nh_grp_entry *nhges, *new_nhges;
952         struct nexthop *nhp = nhge->nh_parent;
953         struct netlink_ext_ack extack;
954         struct nexthop *nh = nhge->nh;
955         struct nh_group *nhg, *newg;
956         int i, j, err;
957
958         WARN_ON(!nh);
959
960         nhg = rtnl_dereference(nhp->nh_grp);
961         newg = nhg->spare;
962
963         /* last entry, keep it visible and remove the parent */
964         if (nhg->num_nh == 1) {
965                 remove_nexthop(net, nhp, nlinfo);
966                 return;
967         }
968
969         newg->has_v4 = false;
970         newg->is_multipath = nhg->is_multipath;
971         newg->mpath = nhg->mpath;
972         newg->fdb_nh = nhg->fdb_nh;
973         newg->num_nh = nhg->num_nh;
974
975         /* copy old entries to new except the one getting removed */
976         nhges = nhg->nh_entries;
977         new_nhges = newg->nh_entries;
978         for (i = 0, j = 0; i < nhg->num_nh; ++i) {
979                 struct nh_info *nhi;
980
981                 /* current nexthop getting removed */
982                 if (nhg->nh_entries[i].nh == nh) {
983                         newg->num_nh--;
984                         continue;
985                 }
986
987                 nhi = rtnl_dereference(nhges[i].nh->nh_info);
988                 if (nhi->family == AF_INET)
989                         newg->has_v4 = true;
990
991                 list_del(&nhges[i].nh_list);
992                 new_nhges[j].nh_parent = nhges[i].nh_parent;
993                 new_nhges[j].nh = nhges[i].nh;
994                 new_nhges[j].weight = nhges[i].weight;
995                 list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list);
996                 j++;
997         }
998
999         nh_group_rebalance(newg);
1000         rcu_assign_pointer(nhp->nh_grp, newg);
1001
1002         list_del(&nhge->nh_list);
1003         nexthop_put(nhge->nh);
1004
1005         err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack);
1006         if (err)
1007                 pr_err("%s\n", extack._msg);
1008
1009         if (nlinfo)
1010                 nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
1011 }
1012
1013 static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
1014                                        struct nl_info *nlinfo)
1015 {
1016         struct nh_grp_entry *nhge, *tmp;
1017
1018         list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list)
1019                 remove_nh_grp_entry(net, nhge, nlinfo);
1020
1021         /* make sure all see the newly published array before releasing rtnl */
1022         synchronize_net();
1023 }
1024
1025 static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
1026 {
1027         struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
1028         int i, num_nh = nhg->num_nh;
1029
1030         for (i = 0; i < num_nh; ++i) {
1031                 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
1032
1033                 if (WARN_ON(!nhge->nh))
1034                         continue;
1035
1036                 list_del_init(&nhge->nh_list);
1037         }
1038 }
1039
1040 /* not called for nexthop replace */
1041 static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
1042 {
1043         struct fib6_info *f6i, *tmp;
1044         bool do_flush = false;
1045         struct fib_info *fi;
1046
1047         list_for_each_entry(fi, &nh->fi_list, nh_list) {
1048                 fi->fib_flags |= RTNH_F_DEAD;
1049                 do_flush = true;
1050         }
1051         if (do_flush)
1052                 fib_flush(net);
1053
1054         /* ip6_del_rt removes the entry from this list hence the _safe */
1055         list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
1056                 /* __ip6_del_rt does a release, so do a hold here */
1057                 fib6_info_hold(f6i);
1058                 ipv6_stub->ip6_del_rt(net, f6i,
1059                                       !net->ipv4.sysctl_nexthop_compat_mode);
1060         }
1061 }
1062
1063 static void __remove_nexthop(struct net *net, struct nexthop *nh,
1064                              struct nl_info *nlinfo)
1065 {
1066         __remove_nexthop_fib(net, nh);
1067
1068         if (nh->is_group) {
1069                 remove_nexthop_group(nh, nlinfo);
1070         } else {
1071                 struct nh_info *nhi;
1072
1073                 nhi = rtnl_dereference(nh->nh_info);
1074                 if (nhi->fib_nhc.nhc_dev)
1075                         hlist_del(&nhi->dev_hash);
1076
1077                 remove_nexthop_from_groups(net, nh, nlinfo);
1078         }
1079 }
1080
1081 static void remove_nexthop(struct net *net, struct nexthop *nh,
1082                            struct nl_info *nlinfo)
1083 {
1084         call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL);
1085
1086         /* remove from the tree */
1087         rb_erase(&nh->rb_node, &net->nexthop.rb_root);
1088
1089         if (nlinfo)
1090                 nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
1091
1092         __remove_nexthop(net, nh, nlinfo);
1093         nh_base_seq_inc(net);
1094
1095         nexthop_put(nh);
1096 }
1097
1098 /* if any FIB entries reference this nexthop, any dst entries
1099  * need to be regenerated
1100  */
1101 static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
1102 {
1103         struct fib6_info *f6i;
1104
1105         if (!list_empty(&nh->fi_list))
1106                 rt_cache_flush(net);
1107
1108         list_for_each_entry(f6i, &nh->f6i_list, nh_list)
1109                 ipv6_stub->fib6_update_sernum(net, f6i);
1110 }
1111
1112 static int replace_nexthop_grp(struct net *net, struct nexthop *old,
1113                                struct nexthop *new, const struct nh_config *cfg,
1114                                struct netlink_ext_ack *extack)
1115 {
1116         struct nh_group *oldg, *newg;
1117         int i, err;
1118
1119         if (!new->is_group) {
1120                 NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
1121                 return -EINVAL;
1122         }
1123
1124         err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
1125         if (err)
1126                 return err;
1127
1128         oldg = rtnl_dereference(old->nh_grp);
1129         newg = rtnl_dereference(new->nh_grp);
1130
1131         /* update parents - used by nexthop code for cleanup */
1132         for (i = 0; i < newg->num_nh; i++)
1133                 newg->nh_entries[i].nh_parent = old;
1134
1135         rcu_assign_pointer(old->nh_grp, newg);
1136
1137         for (i = 0; i < oldg->num_nh; i++)
1138                 oldg->nh_entries[i].nh_parent = new;
1139
1140         rcu_assign_pointer(new->nh_grp, oldg);
1141
1142         return 0;
1143 }
1144
1145 static void nh_group_v4_update(struct nh_group *nhg)
1146 {
1147         struct nh_grp_entry *nhges;
1148         bool has_v4 = false;
1149         int i;
1150
1151         nhges = nhg->nh_entries;
1152         for (i = 0; i < nhg->num_nh; i++) {
1153                 struct nh_info *nhi;
1154
1155                 nhi = rtnl_dereference(nhges[i].nh->nh_info);
1156                 if (nhi->family == AF_INET)
1157                         has_v4 = true;
1158         }
1159         nhg->has_v4 = has_v4;
1160 }
1161
1162 static int replace_nexthop_single(struct net *net, struct nexthop *old,
1163                                   struct nexthop *new,
1164                                   struct netlink_ext_ack *extack)
1165 {
1166         u8 old_protocol, old_nh_flags;
1167         struct nh_info *oldi, *newi;
1168         struct nh_grp_entry *nhge;
1169         int err;
1170
1171         if (new->is_group) {
1172                 NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
1173                 return -EINVAL;
1174         }
1175
1176         err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
1177         if (err)
1178                 return err;
1179
1180         /* Hardware flags were set on 'old' as 'new' is not in the red-black
1181          * tree. Therefore, inherit the flags from 'old' to 'new'.
1182          */
1183         new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP);
1184
1185         oldi = rtnl_dereference(old->nh_info);
1186         newi = rtnl_dereference(new->nh_info);
1187
1188         newi->nh_parent = old;
1189         oldi->nh_parent = new;
1190
1191         old_protocol = old->protocol;
1192         old_nh_flags = old->nh_flags;
1193
1194         old->protocol = new->protocol;
1195         old->nh_flags = new->nh_flags;
1196
1197         rcu_assign_pointer(old->nh_info, newi);
1198         rcu_assign_pointer(new->nh_info, oldi);
1199
1200         /* Send a replace notification for all the groups using the nexthop. */
1201         list_for_each_entry(nhge, &old->grp_list, nh_list) {
1202                 struct nexthop *nhp = nhge->nh_parent;
1203
1204                 err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
1205                                              extack);
1206                 if (err)
1207                         goto err_notify;
1208         }
1209
1210         /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
1211          * update IPv4 indication in all the groups using the nexthop.
1212          */
1213         if (oldi->family == AF_INET && newi->family == AF_INET6) {
1214                 list_for_each_entry(nhge, &old->grp_list, nh_list) {
1215                         struct nexthop *nhp = nhge->nh_parent;
1216                         struct nh_group *nhg;
1217
1218                         nhg = rtnl_dereference(nhp->nh_grp);
1219                         nh_group_v4_update(nhg);
1220                 }
1221         }
1222
1223         return 0;
1224
1225 err_notify:
1226         rcu_assign_pointer(new->nh_info, newi);
1227         rcu_assign_pointer(old->nh_info, oldi);
1228         old->nh_flags = old_nh_flags;
1229         old->protocol = old_protocol;
1230         oldi->nh_parent = old;
1231         newi->nh_parent = new;
1232         list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) {
1233                 struct nexthop *nhp = nhge->nh_parent;
1234
1235                 call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack);
1236         }
1237         call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack);
1238         return err;
1239 }
1240
1241 static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
1242                                      struct nl_info *info)
1243 {
1244         struct fib6_info *f6i;
1245
1246         if (!list_empty(&nh->fi_list)) {
1247                 struct fib_info *fi;
1248
1249                 /* expectation is a few fib_info per nexthop and then
1250                  * a lot of routes per fib_info. So mark the fib_info
1251                  * and then walk the fib tables once
1252                  */
1253                 list_for_each_entry(fi, &nh->fi_list, nh_list)
1254                         fi->nh_updated = true;
1255
1256                 fib_info_notify_update(net, info);
1257
1258                 list_for_each_entry(fi, &nh->fi_list, nh_list)
1259                         fi->nh_updated = false;
1260         }
1261
1262         list_for_each_entry(f6i, &nh->f6i_list, nh_list)
1263                 ipv6_stub->fib6_rt_update(net, f6i, info);
1264 }
1265
1266 /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
1267  * linked to this nexthop and for all groups that the nexthop
1268  * is a member of
1269  */
1270 static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
1271                                    struct nl_info *info)
1272 {
1273         struct nh_grp_entry *nhge;
1274
1275         __nexthop_replace_notify(net, nh, info);
1276
1277         list_for_each_entry(nhge, &nh->grp_list, nh_list)
1278                 __nexthop_replace_notify(net, nhge->nh_parent, info);
1279 }
1280
1281 static int replace_nexthop(struct net *net, struct nexthop *old,
1282                            struct nexthop *new, const struct nh_config *cfg,
1283                            struct netlink_ext_ack *extack)
1284 {
1285         bool new_is_reject = false;
1286         struct nh_grp_entry *nhge;
1287         int err;
1288
1289         /* check that existing FIB entries are ok with the
1290          * new nexthop definition
1291          */
1292         err = fib_check_nh_list(old, new, extack);
1293         if (err)
1294                 return err;
1295
1296         err = fib6_check_nh_list(old, new, extack);
1297         if (err)
1298                 return err;
1299
1300         if (!new->is_group) {
1301                 struct nh_info *nhi = rtnl_dereference(new->nh_info);
1302
1303                 new_is_reject = nhi->reject_nh;
1304         }
1305
1306         list_for_each_entry(nhge, &old->grp_list, nh_list) {
1307                 /* if new nexthop is a blackhole, any groups using this
1308                  * nexthop cannot have more than 1 path
1309                  */
1310                 if (new_is_reject &&
1311                     nexthop_num_path(nhge->nh_parent) > 1) {
1312                         NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path");
1313                         return -EINVAL;
1314                 }
1315
1316                 err = fib_check_nh_list(nhge->nh_parent, new, extack);
1317                 if (err)
1318                         return err;
1319
1320                 err = fib6_check_nh_list(nhge->nh_parent, new, extack);
1321                 if (err)
1322                         return err;
1323         }
1324
1325         if (old->is_group)
1326                 err = replace_nexthop_grp(net, old, new, cfg, extack);
1327         else
1328                 err = replace_nexthop_single(net, old, new, extack);
1329
1330         if (!err) {
1331                 nh_rt_cache_flush(net, old);
1332
1333                 __remove_nexthop(net, new, NULL);
1334                 nexthop_put(new);
1335         }
1336
1337         return err;
1338 }
1339
1340 /* called with rtnl_lock held */
1341 static int insert_nexthop(struct net *net, struct nexthop *new_nh,
1342                           struct nh_config *cfg, struct netlink_ext_ack *extack)
1343 {
1344         struct rb_node **pp, *parent = NULL, *next;
1345         struct rb_root *root = &net->nexthop.rb_root;
1346         bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
1347         bool create = !!(cfg->nlflags & NLM_F_CREATE);
1348         u32 new_id = new_nh->id;
1349         int replace_notify = 0;
1350         int rc = -EEXIST;
1351
1352         pp = &root->rb_node;
1353         while (1) {
1354                 struct nexthop *nh;
1355
1356                 next = *pp;
1357                 if (!next)
1358                         break;
1359
1360                 parent = next;
1361
1362                 nh = rb_entry(parent, struct nexthop, rb_node);
1363                 if (new_id < nh->id) {
1364                         pp = &next->rb_left;
1365                 } else if (new_id > nh->id) {
1366                         pp = &next->rb_right;
1367                 } else if (replace) {
1368                         rc = replace_nexthop(net, nh, new_nh, cfg, extack);
1369                         if (!rc) {
1370                                 new_nh = nh; /* send notification with old nh */
1371                                 replace_notify = 1;
1372                         }
1373                         goto out;
1374                 } else {
1375                         /* id already exists and not a replace */
1376                         goto out;
1377                 }
1378         }
1379
1380         if (replace && !create) {
1381                 NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists");
1382                 rc = -ENOENT;
1383                 goto out;
1384         }
1385
1386         rb_link_node_rcu(&new_nh->rb_node, parent, pp);
1387         rb_insert_color(&new_nh->rb_node, root);
1388
1389         rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
1390         if (rc)
1391                 rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);
1392
1393 out:
1394         if (!rc) {
1395                 nh_base_seq_inc(net);
1396                 nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
1397                 if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
1398                         nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
1399         }
1400
1401         return rc;
1402 }
1403
1404 /* rtnl */
1405 /* remove all nexthops tied to a device being deleted */
1406 static void nexthop_flush_dev(struct net_device *dev, unsigned long event)
1407 {
1408         unsigned int hash = nh_dev_hashfn(dev->ifindex);
1409         struct net *net = dev_net(dev);
1410         struct hlist_head *head = &net->nexthop.devhash[hash];
1411         struct hlist_node *n;
1412         struct nh_info *nhi;
1413
1414         hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
1415                 if (nhi->fib_nhc.nhc_dev != dev)
1416                         continue;
1417
1418                 if (nhi->reject_nh &&
1419                     (event == NETDEV_DOWN || event == NETDEV_CHANGE))
1420                         continue;
1421
1422                 remove_nexthop(net, nhi->nh_parent, NULL);
1423         }
1424 }
1425
1426 /* rtnl; called when net namespace is deleted */
1427 static void flush_all_nexthops(struct net *net)
1428 {
1429         struct rb_root *root = &net->nexthop.rb_root;
1430         struct rb_node *node;
1431         struct nexthop *nh;
1432
1433         while ((node = rb_first(root))) {
1434                 nh = rb_entry(node, struct nexthop, rb_node);
1435                 remove_nexthop(net, nh, NULL);
1436                 cond_resched();
1437         }
1438 }
1439
1440 static struct nexthop *nexthop_create_group(struct net *net,
1441                                             struct nh_config *cfg)
1442 {
1443         struct nlattr *grps_attr = cfg->nh_grp;
1444         struct nexthop_grp *entry = nla_data(grps_attr);
1445         u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
1446         struct nh_group *nhg;
1447         struct nexthop *nh;
1448         int i;
1449
1450         if (WARN_ON(!num_nh))
1451                 return ERR_PTR(-EINVAL);
1452
1453         nh = nexthop_alloc();
1454         if (!nh)
1455                 return ERR_PTR(-ENOMEM);
1456
1457         nh->is_group = 1;
1458
1459         nhg = nexthop_grp_alloc(num_nh);
1460         if (!nhg) {
1461                 kfree(nh);
1462                 return ERR_PTR(-ENOMEM);
1463         }
1464
1465         /* spare group used for removals */
1466         nhg->spare = nexthop_grp_alloc(num_nh);
1467         if (!nhg->spare) {
1468                 kfree(nhg);
1469                 kfree(nh);
1470                 return ERR_PTR(-ENOMEM);
1471         }
1472         nhg->spare->spare = nhg;
1473
1474         for (i = 0; i < nhg->num_nh; ++i) {
1475                 struct nexthop *nhe;
1476                 struct nh_info *nhi;
1477
1478                 nhe = nexthop_find_by_id(net, entry[i].id);
1479                 if (!nexthop_get(nhe))
1480                         goto out_no_nh;
1481
1482                 nhi = rtnl_dereference(nhe->nh_info);
1483                 if (nhi->family == AF_INET)
1484                         nhg->has_v4 = true;
1485
1486                 nhg->nh_entries[i].nh = nhe;
1487                 nhg->nh_entries[i].weight = entry[i].weight + 1;
1488                 list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
1489                 nhg->nh_entries[i].nh_parent = nh;
1490         }
1491
1492         if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
1493                 nhg->mpath = 1;
1494                 nhg->is_multipath = true;
1495         }
1496
1497         WARN_ON_ONCE(nhg->mpath != 1);
1498
1499         if (nhg->mpath)
1500                 nh_group_rebalance(nhg);
1501
1502         if (cfg->nh_fdb)
1503                 nhg->fdb_nh = 1;
1504
1505         rcu_assign_pointer(nh->nh_grp, nhg);
1506
1507         return nh;
1508
1509 out_no_nh:
1510         for (i--; i >= 0; --i) {
1511                 list_del(&nhg->nh_entries[i].nh_list);
1512                 nexthop_put(nhg->nh_entries[i].nh);
1513         }
1514
1515         kfree(nhg->spare);
1516         kfree(nhg);
1517         kfree(nh);
1518
1519         return ERR_PTR(-ENOENT);
1520 }
1521
1522 static int nh_create_ipv4(struct net *net, struct nexthop *nh,
1523                           struct nh_info *nhi, struct nh_config *cfg,
1524                           struct netlink_ext_ack *extack)
1525 {
1526         struct fib_nh *fib_nh = &nhi->fib_nh;
1527         struct fib_config fib_cfg = {
1528                 .fc_oif   = cfg->nh_ifindex,
1529                 .fc_gw4   = cfg->gw.ipv4,
1530                 .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
1531                 .fc_flags = cfg->nh_flags,
1532                 .fc_encap = cfg->nh_encap,
1533                 .fc_encap_type = cfg->nh_encap_type,
1534         };
1535         u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN);
1536         int err;
1537
1538         err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
1539         if (err) {
1540                 fib_nh_release(net, fib_nh);
1541                 goto out;
1542         }
1543
1544         if (nhi->fdb_nh)
1545                 goto out;
1546
1547         /* sets nh_dev if successful */
1548         err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
1549         if (!err) {
1550                 nh->nh_flags = fib_nh->fib_nh_flags;
1551                 fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
1552                                           fib_nh->fib_nh_scope);
1553         } else {
1554                 fib_nh_release(net, fib_nh);
1555         }
1556 out:
1557         return err;
1558 }
1559
1560 static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
1561                           struct nh_info *nhi, struct nh_config *cfg,
1562                           struct netlink_ext_ack *extack)
1563 {
1564         struct fib6_nh *fib6_nh = &nhi->fib6_nh;
1565         struct fib6_config fib6_cfg = {
1566                 .fc_table = l3mdev_fib_table(cfg->dev),
1567                 .fc_ifindex = cfg->nh_ifindex,
1568                 .fc_gateway = cfg->gw.ipv6,
1569                 .fc_flags = cfg->nh_flags,
1570                 .fc_encap = cfg->nh_encap,
1571                 .fc_encap_type = cfg->nh_encap_type,
1572                 .fc_is_fdb = cfg->nh_fdb,
1573         };
1574         int err;
1575
1576         if (!ipv6_addr_any(&cfg->gw.ipv6))
1577                 fib6_cfg.fc_flags |= RTF_GATEWAY;
1578
1579         /* sets nh_dev if successful */
1580         err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
1581                                       extack);
1582         if (err)
1583                 ipv6_stub->fib6_nh_release(fib6_nh);
1584         else
1585                 nh->nh_flags = fib6_nh->fib_nh_flags;
1586
1587         return err;
1588 }
1589
1590 static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
1591                                       struct netlink_ext_ack *extack)
1592 {
1593         struct nh_info *nhi;
1594         struct nexthop *nh;
1595         int err = 0;
1596
1597         nh = nexthop_alloc();
1598         if (!nh)
1599                 return ERR_PTR(-ENOMEM);
1600
1601         nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
1602         if (!nhi) {
1603                 kfree(nh);
1604                 return ERR_PTR(-ENOMEM);
1605         }
1606
1607         nh->nh_flags = cfg->nh_flags;
1608         nh->net = net;
1609
1610         nhi->nh_parent = nh;
1611         nhi->family = cfg->nh_family;
1612         nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
1613
1614         if (cfg->nh_fdb)
1615                 nhi->fdb_nh = 1;
1616
1617         if (cfg->nh_blackhole) {
1618                 nhi->reject_nh = 1;
1619                 cfg->nh_ifindex = net->loopback_dev->ifindex;
1620         }
1621
1622         switch (cfg->nh_family) {
1623         case AF_INET:
1624                 err = nh_create_ipv4(net, nh, nhi, cfg, extack);
1625                 break;
1626         case AF_INET6:
1627                 err = nh_create_ipv6(net, nh, nhi, cfg, extack);
1628                 break;
1629         }
1630
1631         if (err) {
1632                 kfree(nhi);
1633                 kfree(nh);
1634                 return ERR_PTR(err);
1635         }
1636
1637         /* add the entry to the device based hash */
1638         if (!nhi->fdb_nh)
1639                 nexthop_devhash_add(net, nhi);
1640
1641         rcu_assign_pointer(nh->nh_info, nhi);
1642
1643         return nh;
1644 }
1645
1646 /* called with rtnl lock held */
1647 static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
1648                                    struct netlink_ext_ack *extack)
1649 {
1650         struct nexthop *nh;
1651         int err;
1652
1653         if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
1654                 NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
1655                 return ERR_PTR(-EINVAL);
1656         }
1657
1658         if (!cfg->nh_id) {
1659                 cfg->nh_id = nh_find_unused_id(net);
1660                 if (!cfg->nh_id) {
1661                         NL_SET_ERR_MSG(extack, "No unused id");
1662                         return ERR_PTR(-EINVAL);
1663                 }
1664         }
1665
1666         if (cfg->nh_grp)
1667                 nh = nexthop_create_group(net, cfg);
1668         else
1669                 nh = nexthop_create(net, cfg, extack);
1670
1671         if (IS_ERR(nh))
1672                 return nh;
1673
1674         refcount_set(&nh->refcnt, 1);
1675         nh->id = cfg->nh_id;
1676         nh->protocol = cfg->nh_protocol;
1677         nh->net = net;
1678
1679         err = insert_nexthop(net, nh, cfg, extack);
1680         if (err) {
1681                 __remove_nexthop(net, nh, NULL);
1682                 nexthop_put(nh);
1683                 nh = ERR_PTR(err);
1684         }
1685
1686         return nh;
1687 }
1688
1689 static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
1690                             struct nlmsghdr *nlh, struct nh_config *cfg,
1691                             struct netlink_ext_ack *extack)
1692 {
1693         struct nhmsg *nhm = nlmsg_data(nlh);
1694         struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
1695         int err;
1696
1697         err = nlmsg_parse(nlh, sizeof(*nhm), tb,
1698                           ARRAY_SIZE(rtm_nh_policy_new) - 1,
1699                           rtm_nh_policy_new, extack);
1700         if (err < 0)
1701                 return err;
1702
1703         err = -EINVAL;
1704         if (nhm->resvd || nhm->nh_scope) {
1705                 NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
1706                 goto out;
1707         }
1708         if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) {
1709                 NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header");
1710                 goto out;
1711         }
1712
1713         switch (nhm->nh_family) {
1714         case AF_INET:
1715         case AF_INET6:
1716                 break;
1717         case AF_UNSPEC:
1718                 if (tb[NHA_GROUP])
1719                         break;
1720                 fallthrough;
1721         default:
1722                 NL_SET_ERR_MSG(extack, "Invalid address family");
1723                 goto out;
1724         }
1725
1726         memset(cfg, 0, sizeof(*cfg));
1727         cfg->nlflags = nlh->nlmsg_flags;
1728         cfg->nlinfo.portid = NETLINK_CB(skb).portid;
1729         cfg->nlinfo.nlh = nlh;
1730         cfg->nlinfo.nl_net = net;
1731
1732         cfg->nh_family = nhm->nh_family;
1733         cfg->nh_protocol = nhm->nh_protocol;
1734         cfg->nh_flags = nhm->nh_flags;
1735
1736         if (tb[NHA_ID])
1737                 cfg->nh_id = nla_get_u32(tb[NHA_ID]);
1738
1739         if (tb[NHA_FDB]) {
1740                 if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] ||
1741                     tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE]) {
1742                         NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole");
1743                         goto out;
1744                 }
1745                 if (nhm->nh_flags) {
1746                         NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header");
1747                         goto out;
1748                 }
1749                 cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]);
1750         }
1751
1752         if (tb[NHA_GROUP]) {
1753                 if (nhm->nh_family != AF_UNSPEC) {
1754                         NL_SET_ERR_MSG(extack, "Invalid family for group");
1755                         goto out;
1756                 }
1757                 cfg->nh_grp = tb[NHA_GROUP];
1758
1759                 cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH;
1760                 if (tb[NHA_GROUP_TYPE])
1761                         cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]);
1762
1763                 if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) {
1764                         NL_SET_ERR_MSG(extack, "Invalid group type");
1765                         goto out;
1766                 }
1767                 err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), extack);
1768
1769                 /* no other attributes should be set */
1770                 goto out;
1771         }
1772
1773         if (tb[NHA_BLACKHOLE]) {
1774                 if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
1775                     tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
1776                         NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
1777                         goto out;
1778                 }
1779
1780                 cfg->nh_blackhole = 1;
1781                 err = 0;
1782                 goto out;
1783         }
1784
1785         if (!cfg->nh_fdb && !tb[NHA_OIF]) {
1786                 NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops");
1787                 goto out;
1788         }
1789
1790         if (!cfg->nh_fdb && tb[NHA_OIF]) {
1791                 cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
1792                 if (cfg->nh_ifindex)
1793                         cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
1794
1795                 if (!cfg->dev) {
1796                         NL_SET_ERR_MSG(extack, "Invalid device index");
1797                         goto out;
1798                 } else if (!(cfg->dev->flags & IFF_UP)) {
1799                         NL_SET_ERR_MSG(extack, "Nexthop device is not up");
1800                         err = -ENETDOWN;
1801                         goto out;
1802                 } else if (!netif_carrier_ok(cfg->dev)) {
1803                         NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
1804                         err = -ENETDOWN;
1805                         goto out;
1806                 }
1807         }
1808
1809         err = -EINVAL;
1810         if (tb[NHA_GATEWAY]) {
1811                 struct nlattr *gwa = tb[NHA_GATEWAY];
1812
1813                 switch (cfg->nh_family) {
1814                 case AF_INET:
1815                         if (nla_len(gwa) != sizeof(u32)) {
1816                                 NL_SET_ERR_MSG(extack, "Invalid gateway");
1817                                 goto out;
1818                         }
1819                         cfg->gw.ipv4 = nla_get_be32(gwa);
1820                         break;
1821                 case AF_INET6:
1822                         if (nla_len(gwa) != sizeof(struct in6_addr)) {
1823                                 NL_SET_ERR_MSG(extack, "Invalid gateway");
1824                                 goto out;
1825                         }
1826                         cfg->gw.ipv6 = nla_get_in6_addr(gwa);
1827                         break;
1828                 default:
1829                         NL_SET_ERR_MSG(extack,
1830                                        "Unknown address family for gateway");
1831                         goto out;
1832                 }
1833         } else {
1834                 /* device only nexthop (no gateway) */
1835                 if (cfg->nh_flags & RTNH_F_ONLINK) {
1836                         NL_SET_ERR_MSG(extack,
1837                                        "ONLINK flag can not be set for nexthop without a gateway");
1838                         goto out;
1839                 }
1840         }
1841
1842         if (tb[NHA_ENCAP]) {
1843                 cfg->nh_encap = tb[NHA_ENCAP];
1844
1845                 if (!tb[NHA_ENCAP_TYPE]) {
1846                         NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing");
1847                         goto out;
1848                 }
1849
1850                 cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
1851                 err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack);
1852                 if (err < 0)
1853                         goto out;
1854
1855         } else if (tb[NHA_ENCAP_TYPE]) {
1856                 NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing");
1857                 goto out;
1858         }
1859
1860
1861         err = 0;
1862 out:
1863         return err;
1864 }
1865
1866 /* rtnl */
1867 static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
1868                            struct netlink_ext_ack *extack)
1869 {
1870         struct net *net = sock_net(skb->sk);
1871         struct nh_config cfg;
1872         struct nexthop *nh;
1873         int err;
1874
1875         err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
1876         if (!err) {
1877                 nh = nexthop_add(net, &cfg, extack);
1878                 if (IS_ERR(nh))
1879                         err = PTR_ERR(nh);
1880         }
1881
1882         return err;
1883 }
1884
1885 static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
1886                                   struct nlattr **tb, u32 *id,
1887                                   struct netlink_ext_ack *extack)
1888 {
1889         struct nhmsg *nhm = nlmsg_data(nlh);
1890
1891         if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
1892                 NL_SET_ERR_MSG(extack, "Invalid values in header");
1893                 return -EINVAL;
1894         }
1895
1896         if (!tb[NHA_ID]) {
1897                 NL_SET_ERR_MSG(extack, "Nexthop id is missing");
1898                 return -EINVAL;
1899         }
1900
1901         *id = nla_get_u32(tb[NHA_ID]);
1902         if (!(*id)) {
1903                 NL_SET_ERR_MSG(extack, "Invalid nexthop id");
1904                 return -EINVAL;
1905         }
1906
1907         return 0;
1908 }
1909
1910 static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id,
1911                                 struct netlink_ext_ack *extack)
1912 {
1913         struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
1914         int err;
1915
1916         err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
1917                           ARRAY_SIZE(rtm_nh_policy_get) - 1,
1918                           rtm_nh_policy_get, extack);
1919         if (err < 0)
1920                 return err;
1921
1922         return __nh_valid_get_del_req(nlh, tb, id, extack);
1923 }
1924
1925 /* rtnl */
1926 static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
1927                            struct netlink_ext_ack *extack)
1928 {
1929         struct net *net = sock_net(skb->sk);
1930         struct nl_info nlinfo = {
1931                 .nlh = nlh,
1932                 .nl_net = net,
1933                 .portid = NETLINK_CB(skb).portid,
1934         };
1935         struct nexthop *nh;
1936         int err;
1937         u32 id;
1938
1939         err = nh_valid_get_del_req(nlh, &id, extack);
1940         if (err)
1941                 return err;
1942
1943         nh = nexthop_find_by_id(net, id);
1944         if (!nh)
1945                 return -ENOENT;
1946
1947         remove_nexthop(net, nh, &nlinfo);
1948
1949         return 0;
1950 }
1951
1952 /* rtnl */
1953 static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
1954                            struct netlink_ext_ack *extack)
1955 {
1956         struct net *net = sock_net(in_skb->sk);
1957         struct sk_buff *skb = NULL;
1958         struct nexthop *nh;
1959         int err;
1960         u32 id;
1961
1962         err = nh_valid_get_del_req(nlh, &id, extack);
1963         if (err)
1964                 return err;
1965
1966         err = -ENOBUFS;
1967         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1968         if (!skb)
1969                 goto out;
1970
1971         err = -ENOENT;
1972         nh = nexthop_find_by_id(net, id);
1973         if (!nh)
1974                 goto errout_free;
1975
1976         err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
1977                            nlh->nlmsg_seq, 0);
1978         if (err < 0) {
1979                 WARN_ON(err == -EMSGSIZE);
1980                 goto errout_free;
1981         }
1982
1983         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1984 out:
1985         return err;
1986 errout_free:
1987         kfree_skb(skb);
1988         goto out;
1989 }
1990
1991 struct nh_dump_filter {
1992         int dev_idx;
1993         int master_idx;
1994         bool group_filter;
1995         bool fdb_filter;
1996 };
1997
1998 static bool nh_dump_filtered(struct nexthop *nh,
1999                              struct nh_dump_filter *filter, u8 family)
2000 {
2001         const struct net_device *dev;
2002         const struct nh_info *nhi;
2003
2004         if (filter->group_filter && !nh->is_group)
2005                 return true;
2006
2007         if (!filter->dev_idx && !filter->master_idx && !family)
2008                 return false;
2009
2010         if (nh->is_group)
2011                 return true;
2012
2013         nhi = rtnl_dereference(nh->nh_info);
2014         if (family && nhi->family != family)
2015                 return true;
2016
2017         dev = nhi->fib_nhc.nhc_dev;
2018         if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx))
2019                 return true;
2020
2021         if (filter->master_idx) {
2022                 struct net_device *master;
2023
2024                 if (!dev)
2025                         return true;
2026
2027                 master = netdev_master_upper_dev_get((struct net_device *)dev);
2028                 if (!master || master->ifindex != filter->master_idx)
2029                         return true;
2030         }
2031
2032         return false;
2033 }
2034
2035 static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb,
2036                                struct nh_dump_filter *filter,
2037                                struct netlink_ext_ack *extack)
2038 {
2039         struct nhmsg *nhm;
2040         u32 idx;
2041
2042         if (tb[NHA_OIF]) {
2043                 idx = nla_get_u32(tb[NHA_OIF]);
2044                 if (idx > INT_MAX) {
2045                         NL_SET_ERR_MSG(extack, "Invalid device index");
2046                         return -EINVAL;
2047                 }
2048                 filter->dev_idx = idx;
2049         }
2050         if (tb[NHA_MASTER]) {
2051                 idx = nla_get_u32(tb[NHA_MASTER]);
2052                 if (idx > INT_MAX) {
2053                         NL_SET_ERR_MSG(extack, "Invalid master device index");
2054                         return -EINVAL;
2055                 }
2056                 filter->master_idx = idx;
2057         }
2058         filter->group_filter = nla_get_flag(tb[NHA_GROUPS]);
2059         filter->fdb_filter = nla_get_flag(tb[NHA_FDB]);
2060
2061         nhm = nlmsg_data(nlh);
2062         if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
2063                 NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request");
2064                 return -EINVAL;
2065         }
2066
2067         return 0;
2068 }
2069
2070 static int nh_valid_dump_req(const struct nlmsghdr *nlh,
2071                              struct nh_dump_filter *filter,
2072                              struct netlink_callback *cb)
2073 {
2074         struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)];
2075         int err;
2076
2077         err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
2078                           ARRAY_SIZE(rtm_nh_policy_dump) - 1,
2079                           rtm_nh_policy_dump, cb->extack);
2080         if (err < 0)
2081                 return err;
2082
2083         return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
2084 }
2085
2086 struct rtm_dump_nh_ctx {
2087         u32 idx;
2088 };
2089
2090 static struct rtm_dump_nh_ctx *
2091 rtm_dump_nh_ctx(struct netlink_callback *cb)
2092 {
2093         struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx;
2094
2095         BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
2096         return ctx;
2097 }
2098
2099 static int rtm_dump_walk_nexthops(struct sk_buff *skb,
2100                                   struct netlink_callback *cb,
2101                                   struct rb_root *root,
2102                                   struct rtm_dump_nh_ctx *ctx,
2103                                   int (*nh_cb)(struct sk_buff *skb,
2104                                                struct netlink_callback *cb,
2105                                                struct nexthop *nh, void *data),
2106                                   void *data)
2107 {
2108         struct rb_node *node;
2109         int idx = 0, s_idx;
2110         int err;
2111
2112         s_idx = ctx->idx;
2113         for (node = rb_first(root); node; node = rb_next(node)) {
2114                 struct nexthop *nh;
2115
2116                 if (idx < s_idx)
2117                         goto cont;
2118
2119                 nh = rb_entry(node, struct nexthop, rb_node);
2120                 ctx->idx = idx;
2121                 err = nh_cb(skb, cb, nh, data);
2122                 if (err)
2123                         return err;
2124 cont:
2125                 idx++;
2126         }
2127
2128         ctx->idx = idx;
2129         return 0;
2130 }
2131
2132 static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb,
2133                                struct nexthop *nh, void *data)
2134 {
2135         struct nhmsg *nhm = nlmsg_data(cb->nlh);
2136         struct nh_dump_filter *filter = data;
2137
2138         if (nh_dump_filtered(nh, filter, nhm->nh_family))
2139                 return 0;
2140
2141         return nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
2142                             NETLINK_CB(cb->skb).portid,
2143                             cb->nlh->nlmsg_seq, NLM_F_MULTI);
2144 }
2145
2146 /* rtnl */
2147 static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
2148 {
2149         struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb);
2150         struct net *net = sock_net(skb->sk);
2151         struct rb_root *root = &net->nexthop.rb_root;
2152         struct nh_dump_filter filter = {};
2153         int err;
2154
2155         err = nh_valid_dump_req(cb->nlh, &filter, cb);
2156         if (err < 0)
2157                 return err;
2158
2159         err = rtm_dump_walk_nexthops(skb, cb, root, ctx,
2160                                      &rtm_dump_nexthop_cb, &filter);
2161         if (err < 0) {
2162                 if (likely(skb->len))
2163                         goto out;
2164                 goto out_err;
2165         }
2166
2167 out:
2168         err = skb->len;
2169 out_err:
2170         cb->seq = net->nexthop.seq;
2171         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2172         return err;
2173 }
2174
2175 static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
2176 {
2177         unsigned int hash = nh_dev_hashfn(dev->ifindex);
2178         struct net *net = dev_net(dev);
2179         struct hlist_head *head = &net->nexthop.devhash[hash];
2180         struct hlist_node *n;
2181         struct nh_info *nhi;
2182
2183         hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
2184                 if (nhi->fib_nhc.nhc_dev == dev) {
2185                         if (nhi->family == AF_INET)
2186                                 fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu,
2187                                                    orig_mtu);
2188                 }
2189         }
2190 }
2191
2192 /* rtnl */
2193 static int nh_netdev_event(struct notifier_block *this,
2194                            unsigned long event, void *ptr)
2195 {
2196         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2197         struct netdev_notifier_info_ext *info_ext;
2198
2199         switch (event) {
2200         case NETDEV_DOWN:
2201         case NETDEV_UNREGISTER:
2202                 nexthop_flush_dev(dev, event);
2203                 break;
2204         case NETDEV_CHANGE:
2205                 if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
2206                         nexthop_flush_dev(dev, event);
2207                 break;
2208         case NETDEV_CHANGEMTU:
2209                 info_ext = ptr;
2210                 nexthop_sync_mtu(dev, info_ext->ext.mtu);
2211                 rt_cache_flush(dev_net(dev));
2212                 break;
2213         }
2214         return NOTIFY_DONE;
2215 }
2216
2217 static struct notifier_block nh_netdev_notifier = {
2218         .notifier_call = nh_netdev_event,
2219 };
2220
2221 static int nexthops_dump(struct net *net, struct notifier_block *nb,
2222                          struct netlink_ext_ack *extack)
2223 {
2224         struct rb_root *root = &net->nexthop.rb_root;
2225         struct rb_node *node;
2226         int err = 0;
2227
2228         for (node = rb_first(root); node; node = rb_next(node)) {
2229                 struct nexthop *nh;
2230
2231                 nh = rb_entry(node, struct nexthop, rb_node);
2232                 err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh,
2233                                             extack);
2234                 if (err)
2235                         break;
2236         }
2237
2238         return err;
2239 }
2240
2241 int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
2242                               struct netlink_ext_ack *extack)
2243 {
2244         int err;
2245
2246         rtnl_lock();
2247         err = nexthops_dump(net, nb, extack);
2248         if (err)
2249                 goto unlock;
2250         err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
2251                                                nb);
2252 unlock:
2253         rtnl_unlock();
2254         return err;
2255 }
2256 EXPORT_SYMBOL(register_nexthop_notifier);
2257
2258 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
2259 {
2260         return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
2261                                                   nb);
2262 }
2263 EXPORT_SYMBOL(unregister_nexthop_notifier);
2264
2265 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap)
2266 {
2267         struct nexthop *nexthop;
2268
2269         rcu_read_lock();
2270
2271         nexthop = nexthop_find_by_id(net, id);
2272         if (!nexthop)
2273                 goto out;
2274
2275         nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
2276         if (offload)
2277                 nexthop->nh_flags |= RTNH_F_OFFLOAD;
2278         if (trap)
2279                 nexthop->nh_flags |= RTNH_F_TRAP;
2280
2281 out:
2282         rcu_read_unlock();
2283 }
2284 EXPORT_SYMBOL(nexthop_set_hw_flags);
2285
2286 static void __net_exit nexthop_net_exit(struct net *net)
2287 {
2288         rtnl_lock();
2289         flush_all_nexthops(net);
2290         rtnl_unlock();
2291         kfree(net->nexthop.devhash);
2292 }
2293
2294 static int __net_init nexthop_net_init(struct net *net)
2295 {
2296         size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE;
2297
2298         net->nexthop.rb_root = RB_ROOT;
2299         net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
2300         if (!net->nexthop.devhash)
2301                 return -ENOMEM;
2302         BLOCKING_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain);
2303
2304         return 0;
2305 }
2306
2307 static struct pernet_operations nexthop_net_ops = {
2308         .init = nexthop_net_init,
2309         .exit = nexthop_net_exit,
2310 };
2311
2312 static int __init nexthop_init(void)
2313 {
2314         register_pernet_subsys(&nexthop_net_ops);
2315
2316         register_netdevice_notifier(&nh_netdev_notifier);
2317
2318         rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
2319         rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
2320         rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
2321                       rtm_dump_nexthop, 0);
2322
2323         rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
2324         rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
2325
2326         rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
2327         rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
2328
2329         return 0;
2330 }
2331 subsys_initcall(nexthop_init);