ipv4: Prepare for fib6_nh from a nexthop object
[platform/kernel/linux-rpi.git] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  *
10  *              This program is free software; you can redistribute it and/or
11  *              modify it under the terms of the GNU General Public License
12  *              as published by the Free Software Foundation; either version
13  *              2 of the License, or (at your option) any later version.
14  */
15
16 #include <linux/uaccess.h>
17 #include <linux/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/jiffies.h>
21 #include <linux/mm.h>
22 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/errno.h>
26 #include <linux/in.h>
27 #include <linux/inet.h>
28 #include <linux/inetdevice.h>
29 #include <linux/netdevice.h>
30 #include <linux/if_arp.h>
31 #include <linux/proc_fs.h>
32 #include <linux/skbuff.h>
33 #include <linux/init.h>
34 #include <linux/slab.h>
35 #include <linux/netlink.h>
36
37 #include <net/arp.h>
38 #include <net/ip.h>
39 #include <net/protocol.h>
40 #include <net/route.h>
41 #include <net/tcp.h>
42 #include <net/sock.h>
43 #include <net/ip_fib.h>
44 #include <net/ip6_fib.h>
45 #include <net/nexthop.h>
46 #include <net/netlink.h>
47 #include <net/rtnh.h>
48 #include <net/lwtunnel.h>
49 #include <net/fib_notifier.h>
50 #include <net/addrconf.h>
51
52 #include "fib_lookup.h"
53
54 static DEFINE_SPINLOCK(fib_info_lock);
55 static struct hlist_head *fib_info_hash;
56 static struct hlist_head *fib_info_laddrhash;
57 static unsigned int fib_info_hash_size;
58 static unsigned int fib_info_cnt;
59
60 #define DEVINDEX_HASHBITS 8
61 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
62 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
63
64 /* for_nexthops and change_nexthops only used when nexthop object
65  * is not set in a fib_info. The logic within can reference fib_nh.
66  */
67 #ifdef CONFIG_IP_ROUTE_MULTIPATH
68
69 #define for_nexthops(fi) {                                              \
70         int nhsel; const struct fib_nh *nh;                             \
71         for (nhsel = 0, nh = (fi)->fib_nh;                              \
72              nhsel < fib_info_num_path((fi));                           \
73              nh++, nhsel++)
74
75 #define change_nexthops(fi) {                                           \
76         int nhsel; struct fib_nh *nexthop_nh;                           \
77         for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh);   \
78              nhsel < fib_info_num_path((fi));                           \
79              nexthop_nh++, nhsel++)
80
81 #else /* CONFIG_IP_ROUTE_MULTIPATH */
82
83 /* Hope, that gcc will optimize it to get rid of dummy loop */
84
85 #define for_nexthops(fi) {                                              \
86         int nhsel; const struct fib_nh *nh = (fi)->fib_nh;              \
87         for (nhsel = 0; nhsel < 1; nhsel++)
88
89 #define change_nexthops(fi) {                                           \
90         int nhsel;                                                      \
91         struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh);    \
92         for (nhsel = 0; nhsel < 1; nhsel++)
93
94 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
95
96 #define endfor_nexthops(fi) }
97
98
99 const struct fib_prop fib_props[RTN_MAX + 1] = {
100         [RTN_UNSPEC] = {
101                 .error  = 0,
102                 .scope  = RT_SCOPE_NOWHERE,
103         },
104         [RTN_UNICAST] = {
105                 .error  = 0,
106                 .scope  = RT_SCOPE_UNIVERSE,
107         },
108         [RTN_LOCAL] = {
109                 .error  = 0,
110                 .scope  = RT_SCOPE_HOST,
111         },
112         [RTN_BROADCAST] = {
113                 .error  = 0,
114                 .scope  = RT_SCOPE_LINK,
115         },
116         [RTN_ANYCAST] = {
117                 .error  = 0,
118                 .scope  = RT_SCOPE_LINK,
119         },
120         [RTN_MULTICAST] = {
121                 .error  = 0,
122                 .scope  = RT_SCOPE_UNIVERSE,
123         },
124         [RTN_BLACKHOLE] = {
125                 .error  = -EINVAL,
126                 .scope  = RT_SCOPE_UNIVERSE,
127         },
128         [RTN_UNREACHABLE] = {
129                 .error  = -EHOSTUNREACH,
130                 .scope  = RT_SCOPE_UNIVERSE,
131         },
132         [RTN_PROHIBIT] = {
133                 .error  = -EACCES,
134                 .scope  = RT_SCOPE_UNIVERSE,
135         },
136         [RTN_THROW] = {
137                 .error  = -EAGAIN,
138                 .scope  = RT_SCOPE_UNIVERSE,
139         },
140         [RTN_NAT] = {
141                 .error  = -EINVAL,
142                 .scope  = RT_SCOPE_NOWHERE,
143         },
144         [RTN_XRESOLVE] = {
145                 .error  = -EINVAL,
146                 .scope  = RT_SCOPE_NOWHERE,
147         },
148 };
149
150 static void rt_fibinfo_free(struct rtable __rcu **rtp)
151 {
152         struct rtable *rt = rcu_dereference_protected(*rtp, 1);
153
154         if (!rt)
155                 return;
156
157         /* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
158          * because we waited an RCU grace period before calling
159          * free_fib_info_rcu()
160          */
161
162         dst_dev_put(&rt->dst);
163         dst_release_immediate(&rt->dst);
164 }
165
166 static void free_nh_exceptions(struct fib_nh_common *nhc)
167 {
168         struct fnhe_hash_bucket *hash;
169         int i;
170
171         hash = rcu_dereference_protected(nhc->nhc_exceptions, 1);
172         if (!hash)
173                 return;
174         for (i = 0; i < FNHE_HASH_SIZE; i++) {
175                 struct fib_nh_exception *fnhe;
176
177                 fnhe = rcu_dereference_protected(hash[i].chain, 1);
178                 while (fnhe) {
179                         struct fib_nh_exception *next;
180
181                         next = rcu_dereference_protected(fnhe->fnhe_next, 1);
182
183                         rt_fibinfo_free(&fnhe->fnhe_rth_input);
184                         rt_fibinfo_free(&fnhe->fnhe_rth_output);
185
186                         kfree(fnhe);
187
188                         fnhe = next;
189                 }
190         }
191         kfree(hash);
192 }
193
194 static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
195 {
196         int cpu;
197
198         if (!rtp)
199                 return;
200
201         for_each_possible_cpu(cpu) {
202                 struct rtable *rt;
203
204                 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
205                 if (rt) {
206                         dst_dev_put(&rt->dst);
207                         dst_release_immediate(&rt->dst);
208                 }
209         }
210         free_percpu(rtp);
211 }
212
213 void fib_nh_common_release(struct fib_nh_common *nhc)
214 {
215         if (nhc->nhc_dev)
216                 dev_put(nhc->nhc_dev);
217
218         lwtstate_put(nhc->nhc_lwtstate);
219         rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
220         rt_fibinfo_free(&nhc->nhc_rth_input);
221         free_nh_exceptions(nhc);
222 }
223 EXPORT_SYMBOL_GPL(fib_nh_common_release);
224
225 void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
226 {
227 #ifdef CONFIG_IP_ROUTE_CLASSID
228         if (fib_nh->nh_tclassid)
229                 net->ipv4.fib_num_tclassid_users--;
230 #endif
231         fib_nh_common_release(&fib_nh->nh_common);
232 }
233
234 /* Release a nexthop info record */
235 static void free_fib_info_rcu(struct rcu_head *head)
236 {
237         struct fib_info *fi = container_of(head, struct fib_info, rcu);
238
239         change_nexthops(fi) {
240                 fib_nh_release(fi->fib_net, nexthop_nh);
241         } endfor_nexthops(fi);
242
243         ip_fib_metrics_put(fi->fib_metrics);
244
245         kfree(fi);
246 }
247
248 void free_fib_info(struct fib_info *fi)
249 {
250         if (fi->fib_dead == 0) {
251                 pr_warn("Freeing alive fib_info %p\n", fi);
252                 return;
253         }
254         fib_info_cnt--;
255
256         call_rcu(&fi->rcu, free_fib_info_rcu);
257 }
258 EXPORT_SYMBOL_GPL(free_fib_info);
259
260 void fib_release_info(struct fib_info *fi)
261 {
262         spin_lock_bh(&fib_info_lock);
263         if (fi && --fi->fib_treeref == 0) {
264                 hlist_del(&fi->fib_hash);
265                 if (fi->fib_prefsrc)
266                         hlist_del(&fi->fib_lhash);
267                 change_nexthops(fi) {
268                         if (!nexthop_nh->fib_nh_dev)
269                                 continue;
270                         hlist_del(&nexthop_nh->nh_hash);
271                 } endfor_nexthops(fi)
272                 fi->fib_dead = 1;
273                 fib_info_put(fi);
274         }
275         spin_unlock_bh(&fib_info_lock);
276 }
277
278 static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
279 {
280         const struct fib_nh *onh;
281
282         for_nexthops(fi) {
283                 onh = fib_info_nh(ofi, nhsel);
284
285                 if (nh->fib_nh_oif != onh->fib_nh_oif ||
286                     nh->fib_nh_gw_family != onh->fib_nh_gw_family ||
287                     nh->fib_nh_scope != onh->fib_nh_scope ||
288 #ifdef CONFIG_IP_ROUTE_MULTIPATH
289                     nh->fib_nh_weight != onh->fib_nh_weight ||
290 #endif
291 #ifdef CONFIG_IP_ROUTE_CLASSID
292                     nh->nh_tclassid != onh->nh_tclassid ||
293 #endif
294                     lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) ||
295                     ((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK))
296                         return -1;
297
298                 if (nh->fib_nh_gw_family == AF_INET &&
299                     nh->fib_nh_gw4 != onh->fib_nh_gw4)
300                         return -1;
301
302                 if (nh->fib_nh_gw_family == AF_INET6 &&
303                     ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6))
304                         return -1;
305         } endfor_nexthops(fi);
306         return 0;
307 }
308
309 static inline unsigned int fib_devindex_hashfn(unsigned int val)
310 {
311         unsigned int mask = DEVINDEX_HASHSIZE - 1;
312
313         return (val ^
314                 (val >> DEVINDEX_HASHBITS) ^
315                 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
316 }
317
318 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
319 {
320         unsigned int mask = (fib_info_hash_size - 1);
321         unsigned int val = fi->fib_nhs;
322
323         val ^= (fi->fib_protocol << 8) | fi->fib_scope;
324         val ^= (__force u32)fi->fib_prefsrc;
325         val ^= fi->fib_priority;
326         for_nexthops(fi) {
327                 val ^= fib_devindex_hashfn(nh->fib_nh_oif);
328         } endfor_nexthops(fi)
329
330         return (val ^ (val >> 7) ^ (val >> 12)) & mask;
331 }
332
333 static struct fib_info *fib_find_info(struct fib_info *nfi)
334 {
335         struct hlist_head *head;
336         struct fib_info *fi;
337         unsigned int hash;
338
339         hash = fib_info_hashfn(nfi);
340         head = &fib_info_hash[hash];
341
342         hlist_for_each_entry(fi, head, fib_hash) {
343                 if (!net_eq(fi->fib_net, nfi->fib_net))
344                         continue;
345                 if (fi->fib_nhs != nfi->fib_nhs)
346                         continue;
347                 if (nfi->fib_protocol == fi->fib_protocol &&
348                     nfi->fib_scope == fi->fib_scope &&
349                     nfi->fib_prefsrc == fi->fib_prefsrc &&
350                     nfi->fib_priority == fi->fib_priority &&
351                     nfi->fib_type == fi->fib_type &&
352                     memcmp(nfi->fib_metrics, fi->fib_metrics,
353                            sizeof(u32) * RTAX_MAX) == 0 &&
354                     !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
355                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
356                         return fi;
357         }
358
359         return NULL;
360 }
361
362 /* Check, that the gateway is already configured.
363  * Used only by redirect accept routine.
364  */
365 int ip_fib_check_default(__be32 gw, struct net_device *dev)
366 {
367         struct hlist_head *head;
368         struct fib_nh *nh;
369         unsigned int hash;
370
371         spin_lock(&fib_info_lock);
372
373         hash = fib_devindex_hashfn(dev->ifindex);
374         head = &fib_info_devhash[hash];
375         hlist_for_each_entry(nh, head, nh_hash) {
376                 if (nh->fib_nh_dev == dev &&
377                     nh->fib_nh_gw4 == gw &&
378                     !(nh->fib_nh_flags & RTNH_F_DEAD)) {
379                         spin_unlock(&fib_info_lock);
380                         return 0;
381                 }
382         }
383
384         spin_unlock(&fib_info_lock);
385
386         return -1;
387 }
388
389 static inline size_t fib_nlmsg_size(struct fib_info *fi)
390 {
391         size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
392                          + nla_total_size(4) /* RTA_TABLE */
393                          + nla_total_size(4) /* RTA_DST */
394                          + nla_total_size(4) /* RTA_PRIORITY */
395                          + nla_total_size(4) /* RTA_PREFSRC */
396                          + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
397         unsigned int nhs = fib_info_num_path(fi);
398
399         /* space for nested metrics */
400         payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
401
402         if (nhs) {
403                 size_t nh_encapsize = 0;
404                 /* Also handles the special case nhs == 1 */
405
406                 /* each nexthop is packed in an attribute */
407                 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
408                 unsigned int i;
409
410                 /* may contain flow and gateway attribute */
411                 nhsize += 2 * nla_total_size(4);
412
413                 /* grab encap info */
414                 for (i = 0; i < fib_info_num_path(fi); i++) {
415                         struct fib_nh_common *nhc = fib_info_nhc(fi, i);
416
417                         if (nhc->nhc_lwtstate) {
418                                 /* RTA_ENCAP_TYPE */
419                                 nh_encapsize += lwtunnel_get_encap_size(
420                                                 nhc->nhc_lwtstate);
421                                 /* RTA_ENCAP */
422                                 nh_encapsize +=  nla_total_size(2);
423                         }
424                 }
425
426                 /* all nexthops are packed in a nested attribute */
427                 payload += nla_total_size((nhs * nhsize) + nh_encapsize);
428
429         }
430
431         return payload;
432 }
433
434 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
435                int dst_len, u32 tb_id, const struct nl_info *info,
436                unsigned int nlm_flags)
437 {
438         struct sk_buff *skb;
439         u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
440         int err = -ENOBUFS;
441
442         skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
443         if (!skb)
444                 goto errout;
445
446         err = fib_dump_info(skb, info->portid, seq, event, tb_id,
447                             fa->fa_type, key, dst_len,
448                             fa->fa_tos, fa->fa_info, nlm_flags);
449         if (err < 0) {
450                 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
451                 WARN_ON(err == -EMSGSIZE);
452                 kfree_skb(skb);
453                 goto errout;
454         }
455         rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE,
456                     info->nlh, GFP_KERNEL);
457         return;
458 errout:
459         if (err < 0)
460                 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
461 }
462
463 static int fib_detect_death(struct fib_info *fi, int order,
464                             struct fib_info **last_resort, int *last_idx,
465                             int dflt)
466 {
467         const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
468         struct neighbour *n;
469         int state = NUD_NONE;
470
471         if (likely(nhc->nhc_gw_family == AF_INET))
472                 n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev);
473         else if (nhc->nhc_gw_family == AF_INET6)
474                 n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6,
475                                  nhc->nhc_dev);
476         else
477                 n = NULL;
478
479         if (n) {
480                 state = n->nud_state;
481                 neigh_release(n);
482         } else {
483                 return 0;
484         }
485         if (state == NUD_REACHABLE)
486                 return 0;
487         if ((state & NUD_VALID) && order != dflt)
488                 return 0;
489         if ((state & NUD_VALID) ||
490             (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) {
491                 *last_resort = fi;
492                 *last_idx = order;
493         }
494         return 1;
495 }
496
497 int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap,
498                        u16 encap_type, void *cfg, gfp_t gfp_flags,
499                        struct netlink_ext_ack *extack)
500 {
501         int err;
502
503         nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *,
504                                                     gfp_flags);
505         if (!nhc->nhc_pcpu_rth_output)
506                 return -ENOMEM;
507
508         if (encap) {
509                 struct lwtunnel_state *lwtstate;
510
511                 if (encap_type == LWTUNNEL_ENCAP_NONE) {
512                         NL_SET_ERR_MSG(extack, "LWT encap type not specified");
513                         err = -EINVAL;
514                         goto lwt_failure;
515                 }
516                 err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family,
517                                            cfg, &lwtstate, extack);
518                 if (err)
519                         goto lwt_failure;
520
521                 nhc->nhc_lwtstate = lwtstate_get(lwtstate);
522         }
523
524         return 0;
525
526 lwt_failure:
527         rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
528         nhc->nhc_pcpu_rth_output = NULL;
529         return err;
530 }
531 EXPORT_SYMBOL_GPL(fib_nh_common_init);
532
533 int fib_nh_init(struct net *net, struct fib_nh *nh,
534                 struct fib_config *cfg, int nh_weight,
535                 struct netlink_ext_ack *extack)
536 {
537         int err;
538
539         nh->fib_nh_family = AF_INET;
540
541         err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap,
542                                  cfg->fc_encap_type, cfg, GFP_KERNEL, extack);
543         if (err)
544                 return err;
545
546         nh->fib_nh_oif = cfg->fc_oif;
547         nh->fib_nh_gw_family = cfg->fc_gw_family;
548         if (cfg->fc_gw_family == AF_INET)
549                 nh->fib_nh_gw4 = cfg->fc_gw4;
550         else if (cfg->fc_gw_family == AF_INET6)
551                 nh->fib_nh_gw6 = cfg->fc_gw6;
552
553         nh->fib_nh_flags = cfg->fc_flags;
554
555 #ifdef CONFIG_IP_ROUTE_CLASSID
556         nh->nh_tclassid = cfg->fc_flow;
557         if (nh->nh_tclassid)
558                 net->ipv4.fib_num_tclassid_users++;
559 #endif
560 #ifdef CONFIG_IP_ROUTE_MULTIPATH
561         nh->fib_nh_weight = nh_weight;
562 #endif
563         return 0;
564 }
565
566 #ifdef CONFIG_IP_ROUTE_MULTIPATH
567
568 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
569                               struct netlink_ext_ack *extack)
570 {
571         int nhs = 0;
572
573         while (rtnh_ok(rtnh, remaining)) {
574                 nhs++;
575                 rtnh = rtnh_next(rtnh, &remaining);
576         }
577
578         /* leftover implies invalid nexthop configuration, discard it */
579         if (remaining > 0) {
580                 NL_SET_ERR_MSG(extack,
581                                "Invalid nexthop configuration - extra data after nexthops");
582                 nhs = 0;
583         }
584
585         return nhs;
586 }
587
588 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
589                        int remaining, struct fib_config *cfg,
590                        struct netlink_ext_ack *extack)
591 {
592         struct net *net = fi->fib_net;
593         struct fib_config fib_cfg;
594         struct fib_nh *nh;
595         int ret;
596
597         change_nexthops(fi) {
598                 int attrlen;
599
600                 memset(&fib_cfg, 0, sizeof(fib_cfg));
601
602                 if (!rtnh_ok(rtnh, remaining)) {
603                         NL_SET_ERR_MSG(extack,
604                                        "Invalid nexthop configuration - extra data after nexthop");
605                         return -EINVAL;
606                 }
607
608                 if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
609                         NL_SET_ERR_MSG(extack,
610                                        "Invalid flags for nexthop - can not contain DEAD or LINKDOWN");
611                         return -EINVAL;
612                 }
613
614                 fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
615                 fib_cfg.fc_oif = rtnh->rtnh_ifindex;
616
617                 attrlen = rtnh_attrlen(rtnh);
618                 if (attrlen > 0) {
619                         struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
620
621                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
622                         nlav = nla_find(attrs, attrlen, RTA_VIA);
623                         if (nla && nlav) {
624                                 NL_SET_ERR_MSG(extack,
625                                                "Nexthop configuration can not contain both GATEWAY and VIA");
626                                 return -EINVAL;
627                         }
628                         if (nla) {
629                                 fib_cfg.fc_gw4 = nla_get_in_addr(nla);
630                                 if (fib_cfg.fc_gw4)
631                                         fib_cfg.fc_gw_family = AF_INET;
632                         } else if (nlav) {
633                                 ret = fib_gw_from_via(&fib_cfg, nlav, extack);
634                                 if (ret)
635                                         goto errout;
636                         }
637
638                         nla = nla_find(attrs, attrlen, RTA_FLOW);
639                         if (nla)
640                                 fib_cfg.fc_flow = nla_get_u32(nla);
641
642                         fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
643                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
644                         if (nla)
645                                 fib_cfg.fc_encap_type = nla_get_u16(nla);
646                 }
647
648                 ret = fib_nh_init(net, nexthop_nh, &fib_cfg,
649                                   rtnh->rtnh_hops + 1, extack);
650                 if (ret)
651                         goto errout;
652
653                 rtnh = rtnh_next(rtnh, &remaining);
654         } endfor_nexthops(fi);
655
656         ret = -EINVAL;
657         nh = fib_info_nh(fi, 0);
658         if (cfg->fc_oif && nh->fib_nh_oif != cfg->fc_oif) {
659                 NL_SET_ERR_MSG(extack,
660                                "Nexthop device index does not match RTA_OIF");
661                 goto errout;
662         }
663         if (cfg->fc_gw_family) {
664                 if (cfg->fc_gw_family != nh->fib_nh_gw_family ||
665                     (cfg->fc_gw_family == AF_INET &&
666                      nh->fib_nh_gw4 != cfg->fc_gw4) ||
667                     (cfg->fc_gw_family == AF_INET6 &&
668                      ipv6_addr_cmp(&nh->fib_nh_gw6, &cfg->fc_gw6))) {
669                         NL_SET_ERR_MSG(extack,
670                                        "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA");
671                         goto errout;
672                 }
673         }
674 #ifdef CONFIG_IP_ROUTE_CLASSID
675         if (cfg->fc_flow && nh->nh_tclassid != cfg->fc_flow) {
676                 NL_SET_ERR_MSG(extack,
677                                "Nexthop class id does not match RTA_FLOW");
678                 goto errout;
679         }
680 #endif
681         ret = 0;
682 errout:
683         return ret;
684 }
685
686 static void fib_rebalance(struct fib_info *fi)
687 {
688         int total;
689         int w;
690
691         if (fib_info_num_path(fi) < 2)
692                 return;
693
694         total = 0;
695         for_nexthops(fi) {
696                 if (nh->fib_nh_flags & RTNH_F_DEAD)
697                         continue;
698
699                 if (ip_ignore_linkdown(nh->fib_nh_dev) &&
700                     nh->fib_nh_flags & RTNH_F_LINKDOWN)
701                         continue;
702
703                 total += nh->fib_nh_weight;
704         } endfor_nexthops(fi);
705
706         w = 0;
707         change_nexthops(fi) {
708                 int upper_bound;
709
710                 if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) {
711                         upper_bound = -1;
712                 } else if (ip_ignore_linkdown(nexthop_nh->fib_nh_dev) &&
713                            nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
714                         upper_bound = -1;
715                 } else {
716                         w += nexthop_nh->fib_nh_weight;
717                         upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
718                                                             total) - 1;
719                 }
720
721                 atomic_set(&nexthop_nh->fib_nh_upper_bound, upper_bound);
722         } endfor_nexthops(fi);
723 }
724 #else /* CONFIG_IP_ROUTE_MULTIPATH */
725
726 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
727                        int remaining, struct fib_config *cfg,
728                        struct netlink_ext_ack *extack)
729 {
730         NL_SET_ERR_MSG(extack, "Multipath support not enabled in kernel");
731
732         return -EINVAL;
733 }
734
735 #define fib_rebalance(fi) do { } while (0)
736
737 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
738
739 static int fib_encap_match(u16 encap_type,
740                            struct nlattr *encap,
741                            const struct fib_nh *nh,
742                            const struct fib_config *cfg,
743                            struct netlink_ext_ack *extack)
744 {
745         struct lwtunnel_state *lwtstate;
746         int ret, result = 0;
747
748         if (encap_type == LWTUNNEL_ENCAP_NONE)
749                 return 0;
750
751         ret = lwtunnel_build_state(encap_type, encap, AF_INET,
752                                    cfg, &lwtstate, extack);
753         if (!ret) {
754                 result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws);
755                 lwtstate_free(lwtstate);
756         }
757
758         return result;
759 }
760
761 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
762                  struct netlink_ext_ack *extack)
763 {
764 #ifdef CONFIG_IP_ROUTE_MULTIPATH
765         struct rtnexthop *rtnh;
766         int remaining;
767 #endif
768
769         if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
770                 return 1;
771
772         if (cfg->fc_oif || cfg->fc_gw_family) {
773                 struct fib_nh *nh = fib_info_nh(fi, 0);
774
775                 if (cfg->fc_encap) {
776                         if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
777                                             nh, cfg, extack))
778                                 return 1;
779                 }
780 #ifdef CONFIG_IP_ROUTE_CLASSID
781                 if (cfg->fc_flow &&
782                     cfg->fc_flow != nh->nh_tclassid)
783                         return 1;
784 #endif
785                 if ((cfg->fc_oif && cfg->fc_oif != nh->fib_nh_oif) ||
786                     (cfg->fc_gw_family &&
787                      cfg->fc_gw_family != nh->fib_nh_gw_family))
788                         return 1;
789
790                 if (cfg->fc_gw_family == AF_INET &&
791                     cfg->fc_gw4 != nh->fib_nh_gw4)
792                         return 1;
793
794                 if (cfg->fc_gw_family == AF_INET6 &&
795                     ipv6_addr_cmp(&cfg->fc_gw6, &nh->fib_nh_gw6))
796                         return 1;
797
798                 return 0;
799         }
800
801 #ifdef CONFIG_IP_ROUTE_MULTIPATH
802         if (!cfg->fc_mp)
803                 return 0;
804
805         rtnh = cfg->fc_mp;
806         remaining = cfg->fc_mp_len;
807
808         for_nexthops(fi) {
809                 int attrlen;
810
811                 if (!rtnh_ok(rtnh, remaining))
812                         return -EINVAL;
813
814                 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->fib_nh_oif)
815                         return 1;
816
817                 attrlen = rtnh_attrlen(rtnh);
818                 if (attrlen > 0) {
819                         struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
820
821                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
822                         nlav = nla_find(attrs, attrlen, RTA_VIA);
823                         if (nla && nlav) {
824                                 NL_SET_ERR_MSG(extack,
825                                                "Nexthop configuration can not contain both GATEWAY and VIA");
826                                 return -EINVAL;
827                         }
828
829                         if (nla) {
830                                 if (nh->fib_nh_gw_family != AF_INET ||
831                                     nla_get_in_addr(nla) != nh->fib_nh_gw4)
832                                         return 1;
833                         } else if (nlav) {
834                                 struct fib_config cfg2;
835                                 int err;
836
837                                 err = fib_gw_from_via(&cfg2, nlav, extack);
838                                 if (err)
839                                         return err;
840
841                                 switch (nh->fib_nh_gw_family) {
842                                 case AF_INET:
843                                         if (cfg2.fc_gw_family != AF_INET ||
844                                             cfg2.fc_gw4 != nh->fib_nh_gw4)
845                                                 return 1;
846                                         break;
847                                 case AF_INET6:
848                                         if (cfg2.fc_gw_family != AF_INET6 ||
849                                             ipv6_addr_cmp(&cfg2.fc_gw6,
850                                                           &nh->fib_nh_gw6))
851                                                 return 1;
852                                         break;
853                                 }
854                         }
855
856 #ifdef CONFIG_IP_ROUTE_CLASSID
857                         nla = nla_find(attrs, attrlen, RTA_FLOW);
858                         if (nla && nla_get_u32(nla) != nh->nh_tclassid)
859                                 return 1;
860 #endif
861                 }
862
863                 rtnh = rtnh_next(rtnh, &remaining);
864         } endfor_nexthops(fi);
865 #endif
866         return 0;
867 }
868
869 bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
870 {
871         struct nlattr *nla;
872         int remaining;
873
874         if (!cfg->fc_mx)
875                 return true;
876
877         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
878                 int type = nla_type(nla);
879                 u32 fi_val, val;
880
881                 if (!type)
882                         continue;
883                 if (type > RTAX_MAX)
884                         return false;
885
886                 if (type == RTAX_CC_ALGO) {
887                         char tmp[TCP_CA_NAME_MAX];
888                         bool ecn_ca = false;
889
890                         nla_strlcpy(tmp, nla, sizeof(tmp));
891                         val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
892                 } else {
893                         if (nla_len(nla) != sizeof(u32))
894                                 return false;
895                         val = nla_get_u32(nla);
896                 }
897
898                 fi_val = fi->fib_metrics->metrics[type - 1];
899                 if (type == RTAX_FEATURES)
900                         fi_val &= ~DST_FEATURE_ECN_CA;
901
902                 if (fi_val != val)
903                         return false;
904         }
905
906         return true;
907 }
908
909 static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh,
910                               u32 table, struct netlink_ext_ack *extack)
911 {
912         struct fib6_config cfg = {
913                 .fc_table = table,
914                 .fc_flags = nh->fib_nh_flags | RTF_GATEWAY,
915                 .fc_ifindex = nh->fib_nh_oif,
916                 .fc_gateway = nh->fib_nh_gw6,
917         };
918         struct fib6_nh fib6_nh = {};
919         int err;
920
921         err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack);
922         if (!err) {
923                 nh->fib_nh_dev = fib6_nh.fib_nh_dev;
924                 dev_hold(nh->fib_nh_dev);
925                 nh->fib_nh_oif = nh->fib_nh_dev->ifindex;
926                 nh->fib_nh_scope = RT_SCOPE_LINK;
927
928                 ipv6_stub->fib6_nh_release(&fib6_nh);
929         }
930
931         return err;
932 }
933
934 /*
935  * Picture
936  * -------
937  *
938  * Semantics of nexthop is very messy by historical reasons.
939  * We have to take into account, that:
940  * a) gateway can be actually local interface address,
941  *    so that gatewayed route is direct.
942  * b) gateway must be on-link address, possibly
943  *    described not by an ifaddr, but also by a direct route.
944  * c) If both gateway and interface are specified, they should not
945  *    contradict.
946  * d) If we use tunnel routes, gateway could be not on-link.
947  *
948  * Attempt to reconcile all of these (alas, self-contradictory) conditions
949  * results in pretty ugly and hairy code with obscure logic.
950  *
951  * I chose to generalized it instead, so that the size
952  * of code does not increase practically, but it becomes
953  * much more general.
954  * Every prefix is assigned a "scope" value: "host" is local address,
955  * "link" is direct route,
956  * [ ... "site" ... "interior" ... ]
957  * and "universe" is true gateway route with global meaning.
958  *
959  * Every prefix refers to a set of "nexthop"s (gw, oif),
960  * where gw must have narrower scope. This recursion stops
961  * when gw has LOCAL scope or if "nexthop" is declared ONLINK,
962  * which means that gw is forced to be on link.
963  *
964  * Code is still hairy, but now it is apparently logically
965  * consistent and very flexible. F.e. as by-product it allows
966  * to co-exists in peace independent exterior and interior
967  * routing processes.
968  *
969  * Normally it looks as following.
970  *
971  * {universe prefix}  -> (gw, oif) [scope link]
972  *                |
973  *                |-> {link prefix} -> (gw, oif) [scope local]
974  *                                      |
975  *                                      |-> {local prefix} (terminal node)
976  */
977 static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
978                               u8 scope, struct netlink_ext_ack *extack)
979 {
980         struct net_device *dev;
981         struct fib_result res;
982         int err;
983
984         if (nh->fib_nh_flags & RTNH_F_ONLINK) {
985                 unsigned int addr_type;
986
987                 if (scope >= RT_SCOPE_LINK) {
988                         NL_SET_ERR_MSG(extack, "Nexthop has invalid scope");
989                         return -EINVAL;
990                 }
991                 dev = __dev_get_by_index(net, nh->fib_nh_oif);
992                 if (!dev) {
993                         NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
994                         return -ENODEV;
995                 }
996                 if (!(dev->flags & IFF_UP)) {
997                         NL_SET_ERR_MSG(extack, "Nexthop device is not up");
998                         return -ENETDOWN;
999                 }
1000                 addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4);
1001                 if (addr_type != RTN_UNICAST) {
1002                         NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
1003                         return -EINVAL;
1004                 }
1005                 if (!netif_carrier_ok(dev))
1006                         nh->fib_nh_flags |= RTNH_F_LINKDOWN;
1007                 nh->fib_nh_dev = dev;
1008                 dev_hold(dev);
1009                 nh->fib_nh_scope = RT_SCOPE_LINK;
1010                 return 0;
1011         }
1012         rcu_read_lock();
1013         {
1014                 struct fib_table *tbl = NULL;
1015                 struct flowi4 fl4 = {
1016                         .daddr = nh->fib_nh_gw4,
1017                         .flowi4_scope = scope + 1,
1018                         .flowi4_oif = nh->fib_nh_oif,
1019                         .flowi4_iif = LOOPBACK_IFINDEX,
1020                 };
1021
1022                 /* It is not necessary, but requires a bit of thinking */
1023                 if (fl4.flowi4_scope < RT_SCOPE_LINK)
1024                         fl4.flowi4_scope = RT_SCOPE_LINK;
1025
1026                 if (table)
1027                         tbl = fib_get_table(net, table);
1028
1029                 if (tbl)
1030                         err = fib_table_lookup(tbl, &fl4, &res,
1031                                                FIB_LOOKUP_IGNORE_LINKSTATE |
1032                                                FIB_LOOKUP_NOREF);
1033
1034                 /* on error or if no table given do full lookup. This
1035                  * is needed for example when nexthops are in the local
1036                  * table rather than the given table
1037                  */
1038                 if (!tbl || err) {
1039                         err = fib_lookup(net, &fl4, &res,
1040                                          FIB_LOOKUP_IGNORE_LINKSTATE);
1041                 }
1042
1043                 if (err) {
1044                         NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
1045                         goto out;
1046                 }
1047         }
1048
1049         err = -EINVAL;
1050         if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
1051                 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
1052                 goto out;
1053         }
1054         nh->fib_nh_scope = res.scope;
1055         nh->fib_nh_oif = FIB_RES_OIF(res);
1056         nh->fib_nh_dev = dev = FIB_RES_DEV(res);
1057         if (!dev) {
1058                 NL_SET_ERR_MSG(extack,
1059                                "No egress device for nexthop gateway");
1060                 goto out;
1061         }
1062         dev_hold(dev);
1063         if (!netif_carrier_ok(dev))
1064                 nh->fib_nh_flags |= RTNH_F_LINKDOWN;
1065         err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
1066 out:
1067         rcu_read_unlock();
1068         return err;
1069 }
1070
1071 static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh,
1072                               struct netlink_ext_ack *extack)
1073 {
1074         struct in_device *in_dev;
1075         int err;
1076
1077         if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
1078                 NL_SET_ERR_MSG(extack,
1079                                "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
1080                 return -EINVAL;
1081         }
1082
1083         rcu_read_lock();
1084
1085         err = -ENODEV;
1086         in_dev = inetdev_by_index(net, nh->fib_nh_oif);
1087         if (!in_dev)
1088                 goto out;
1089         err = -ENETDOWN;
1090         if (!(in_dev->dev->flags & IFF_UP)) {
1091                 NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
1092                 goto out;
1093         }
1094
1095         nh->fib_nh_dev = in_dev->dev;
1096         dev_hold(nh->fib_nh_dev);
1097         nh->fib_nh_scope = RT_SCOPE_HOST;
1098         if (!netif_carrier_ok(nh->fib_nh_dev))
1099                 nh->fib_nh_flags |= RTNH_F_LINKDOWN;
1100         err = 0;
1101 out:
1102         rcu_read_unlock();
1103         return err;
1104 }
1105
1106 int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
1107                  struct netlink_ext_ack *extack)
1108 {
1109         int err;
1110
1111         if (nh->fib_nh_gw_family == AF_INET)
1112                 err = fib_check_nh_v4_gw(net, nh, table, scope, extack);
1113         else if (nh->fib_nh_gw_family == AF_INET6)
1114                 err = fib_check_nh_v6_gw(net, nh, table, extack);
1115         else
1116                 err = fib_check_nh_nongw(net, nh, extack);
1117
1118         return err;
1119 }
1120
1121 static inline unsigned int fib_laddr_hashfn(__be32 val)
1122 {
1123         unsigned int mask = (fib_info_hash_size - 1);
1124
1125         return ((__force u32)val ^
1126                 ((__force u32)val >> 7) ^
1127                 ((__force u32)val >> 14)) & mask;
1128 }
1129
1130 static struct hlist_head *fib_info_hash_alloc(int bytes)
1131 {
1132         if (bytes <= PAGE_SIZE)
1133                 return kzalloc(bytes, GFP_KERNEL);
1134         else
1135                 return (struct hlist_head *)
1136                         __get_free_pages(GFP_KERNEL | __GFP_ZERO,
1137                                          get_order(bytes));
1138 }
1139
1140 static void fib_info_hash_free(struct hlist_head *hash, int bytes)
1141 {
1142         if (!hash)
1143                 return;
1144
1145         if (bytes <= PAGE_SIZE)
1146                 kfree(hash);
1147         else
1148                 free_pages((unsigned long) hash, get_order(bytes));
1149 }
1150
1151 static void fib_info_hash_move(struct hlist_head *new_info_hash,
1152                                struct hlist_head *new_laddrhash,
1153                                unsigned int new_size)
1154 {
1155         struct hlist_head *old_info_hash, *old_laddrhash;
1156         unsigned int old_size = fib_info_hash_size;
1157         unsigned int i, bytes;
1158
1159         spin_lock_bh(&fib_info_lock);
1160         old_info_hash = fib_info_hash;
1161         old_laddrhash = fib_info_laddrhash;
1162         fib_info_hash_size = new_size;
1163
1164         for (i = 0; i < old_size; i++) {
1165                 struct hlist_head *head = &fib_info_hash[i];
1166                 struct hlist_node *n;
1167                 struct fib_info *fi;
1168
1169                 hlist_for_each_entry_safe(fi, n, head, fib_hash) {
1170                         struct hlist_head *dest;
1171                         unsigned int new_hash;
1172
1173                         new_hash = fib_info_hashfn(fi);
1174                         dest = &new_info_hash[new_hash];
1175                         hlist_add_head(&fi->fib_hash, dest);
1176                 }
1177         }
1178         fib_info_hash = new_info_hash;
1179
1180         for (i = 0; i < old_size; i++) {
1181                 struct hlist_head *lhead = &fib_info_laddrhash[i];
1182                 struct hlist_node *n;
1183                 struct fib_info *fi;
1184
1185                 hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
1186                         struct hlist_head *ldest;
1187                         unsigned int new_hash;
1188
1189                         new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
1190                         ldest = &new_laddrhash[new_hash];
1191                         hlist_add_head(&fi->fib_lhash, ldest);
1192                 }
1193         }
1194         fib_info_laddrhash = new_laddrhash;
1195
1196         spin_unlock_bh(&fib_info_lock);
1197
1198         bytes = old_size * sizeof(struct hlist_head *);
1199         fib_info_hash_free(old_info_hash, bytes);
1200         fib_info_hash_free(old_laddrhash, bytes);
1201 }
1202
1203 __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
1204                                  unsigned char scope)
1205 {
1206         struct fib_nh *nh;
1207
1208         if (nhc->nhc_family != AF_INET)
1209                 return inet_select_addr(nhc->nhc_dev, 0, scope);
1210
1211         nh = container_of(nhc, struct fib_nh, nh_common);
1212         nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
1213         nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
1214
1215         return nh->nh_saddr;
1216 }
1217
1218 __be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
1219 {
1220         struct fib_nh_common *nhc = res->nhc;
1221
1222         if (res->fi->fib_prefsrc)
1223                 return res->fi->fib_prefsrc;
1224
1225         if (nhc->nhc_family == AF_INET) {
1226                 struct fib_nh *nh;
1227
1228                 nh = container_of(nhc, struct fib_nh, nh_common);
1229                 if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
1230                         return nh->nh_saddr;
1231         }
1232
1233         return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope);
1234 }
1235
1236 static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
1237 {
1238         if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
1239             fib_prefsrc != cfg->fc_dst) {
1240                 u32 tb_id = cfg->fc_table;
1241                 int rc;
1242
1243                 if (tb_id == RT_TABLE_MAIN)
1244                         tb_id = RT_TABLE_LOCAL;
1245
1246                 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
1247                                           fib_prefsrc, tb_id);
1248
1249                 if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
1250                         rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
1251                                                   fib_prefsrc, RT_TABLE_LOCAL);
1252                 }
1253
1254                 if (rc != RTN_LOCAL)
1255                         return false;
1256         }
1257         return true;
1258 }
1259
1260 struct fib_info *fib_create_info(struct fib_config *cfg,
1261                                  struct netlink_ext_ack *extack)
1262 {
1263         int err;
1264         struct fib_info *fi = NULL;
1265         struct fib_info *ofi;
1266         int nhs = 1;
1267         struct net *net = cfg->fc_nlinfo.nl_net;
1268
1269         if (cfg->fc_type > RTN_MAX)
1270                 goto err_inval;
1271
1272         /* Fast check to catch the most weird cases */
1273         if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {
1274                 NL_SET_ERR_MSG(extack, "Invalid scope");
1275                 goto err_inval;
1276         }
1277
1278         if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
1279                 NL_SET_ERR_MSG(extack,
1280                                "Invalid rtm_flags - can not contain DEAD or LINKDOWN");
1281                 goto err_inval;
1282         }
1283
1284 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1285         if (cfg->fc_mp) {
1286                 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
1287                 if (nhs == 0)
1288                         goto err_inval;
1289         }
1290 #endif
1291
1292         err = -ENOBUFS;
1293         if (fib_info_cnt >= fib_info_hash_size) {
1294                 unsigned int new_size = fib_info_hash_size << 1;
1295                 struct hlist_head *new_info_hash;
1296                 struct hlist_head *new_laddrhash;
1297                 unsigned int bytes;
1298
1299                 if (!new_size)
1300                         new_size = 16;
1301                 bytes = new_size * sizeof(struct hlist_head *);
1302                 new_info_hash = fib_info_hash_alloc(bytes);
1303                 new_laddrhash = fib_info_hash_alloc(bytes);
1304                 if (!new_info_hash || !new_laddrhash) {
1305                         fib_info_hash_free(new_info_hash, bytes);
1306                         fib_info_hash_free(new_laddrhash, bytes);
1307                 } else
1308                         fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
1309
1310                 if (!fib_info_hash_size)
1311                         goto failure;
1312         }
1313
1314         fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
1315         if (!fi)
1316                 goto failure;
1317         fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx,
1318                                               cfg->fc_mx_len, extack);
1319         if (unlikely(IS_ERR(fi->fib_metrics))) {
1320                 err = PTR_ERR(fi->fib_metrics);
1321                 kfree(fi);
1322                 return ERR_PTR(err);
1323         }
1324
1325         fib_info_cnt++;
1326         fi->fib_net = net;
1327         fi->fib_protocol = cfg->fc_protocol;
1328         fi->fib_scope = cfg->fc_scope;
1329         fi->fib_flags = cfg->fc_flags;
1330         fi->fib_priority = cfg->fc_priority;
1331         fi->fib_prefsrc = cfg->fc_prefsrc;
1332         fi->fib_type = cfg->fc_type;
1333         fi->fib_tb_id = cfg->fc_table;
1334
1335         fi->fib_nhs = nhs;
1336         change_nexthops(fi) {
1337                 nexthop_nh->nh_parent = fi;
1338         } endfor_nexthops(fi)
1339
1340         if (cfg->fc_mp)
1341                 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
1342         else
1343                 err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack);
1344
1345         if (err != 0)
1346                 goto failure;
1347
1348         if (fib_props[cfg->fc_type].error) {
1349                 if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) {
1350                         NL_SET_ERR_MSG(extack,
1351                                        "Gateway, device and multipath can not be specified for this route type");
1352                         goto err_inval;
1353                 }
1354                 goto link_it;
1355         } else {
1356                 switch (cfg->fc_type) {
1357                 case RTN_UNICAST:
1358                 case RTN_LOCAL:
1359                 case RTN_BROADCAST:
1360                 case RTN_ANYCAST:
1361                 case RTN_MULTICAST:
1362                         break;
1363                 default:
1364                         NL_SET_ERR_MSG(extack, "Invalid route type");
1365                         goto err_inval;
1366                 }
1367         }
1368
1369         if (cfg->fc_scope > RT_SCOPE_HOST) {
1370                 NL_SET_ERR_MSG(extack, "Invalid scope");
1371                 goto err_inval;
1372         }
1373
1374         if (cfg->fc_scope == RT_SCOPE_HOST) {
1375                 struct fib_nh *nh = fi->fib_nh;
1376
1377                 /* Local address is added. */
1378                 if (nhs != 1) {
1379                         NL_SET_ERR_MSG(extack,
1380                                        "Route with host scope can not have multiple nexthops");
1381                         goto err_inval;
1382                 }
1383                 if (nh->fib_nh_gw_family) {
1384                         NL_SET_ERR_MSG(extack,
1385                                        "Route with host scope can not have a gateway");
1386                         goto err_inval;
1387                 }
1388                 nh->fib_nh_scope = RT_SCOPE_NOWHERE;
1389                 nh->fib_nh_dev = dev_get_by_index(net, nh->fib_nh_oif);
1390                 err = -ENODEV;
1391                 if (!nh->fib_nh_dev)
1392                         goto failure;
1393         } else {
1394                 int linkdown = 0;
1395
1396                 change_nexthops(fi) {
1397                         err = fib_check_nh(cfg->fc_nlinfo.nl_net, nexthop_nh,
1398                                            cfg->fc_table, cfg->fc_scope,
1399                                            extack);
1400                         if (err != 0)
1401                                 goto failure;
1402                         if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN)
1403                                 linkdown++;
1404                 } endfor_nexthops(fi)
1405                 if (linkdown == fi->fib_nhs)
1406                         fi->fib_flags |= RTNH_F_LINKDOWN;
1407         }
1408
1409         if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) {
1410                 NL_SET_ERR_MSG(extack, "Invalid prefsrc address");
1411                 goto err_inval;
1412         }
1413
1414         change_nexthops(fi) {
1415                 fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common,
1416                                           fi->fib_scope);
1417                 if (nexthop_nh->fib_nh_gw_family == AF_INET6)
1418                         fi->fib_nh_is_v6 = true;
1419         } endfor_nexthops(fi)
1420
1421         fib_rebalance(fi);
1422
1423 link_it:
1424         ofi = fib_find_info(fi);
1425         if (ofi) {
1426                 fi->fib_dead = 1;
1427                 free_fib_info(fi);
1428                 ofi->fib_treeref++;
1429                 return ofi;
1430         }
1431
1432         fi->fib_treeref++;
1433         refcount_set(&fi->fib_clntref, 1);
1434         spin_lock_bh(&fib_info_lock);
1435         hlist_add_head(&fi->fib_hash,
1436                        &fib_info_hash[fib_info_hashfn(fi)]);
1437         if (fi->fib_prefsrc) {
1438                 struct hlist_head *head;
1439
1440                 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
1441                 hlist_add_head(&fi->fib_lhash, head);
1442         }
1443         change_nexthops(fi) {
1444                 struct hlist_head *head;
1445                 unsigned int hash;
1446
1447                 if (!nexthop_nh->fib_nh_dev)
1448                         continue;
1449                 hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
1450                 head = &fib_info_devhash[hash];
1451                 hlist_add_head(&nexthop_nh->nh_hash, head);
1452         } endfor_nexthops(fi)
1453         spin_unlock_bh(&fib_info_lock);
1454         return fi;
1455
1456 err_inval:
1457         err = -EINVAL;
1458
1459 failure:
1460         if (fi) {
1461                 fi->fib_dead = 1;
1462                 free_fib_info(fi);
1463         }
1464
1465         return ERR_PTR(err);
1466 }
1467
1468 int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
1469                      unsigned char *flags, bool skip_oif)
1470 {
1471         if (nhc->nhc_flags & RTNH_F_DEAD)
1472                 *flags |= RTNH_F_DEAD;
1473
1474         if (nhc->nhc_flags & RTNH_F_LINKDOWN) {
1475                 *flags |= RTNH_F_LINKDOWN;
1476
1477                 rcu_read_lock();
1478                 switch (nhc->nhc_family) {
1479                 case AF_INET:
1480                         if (ip_ignore_linkdown(nhc->nhc_dev))
1481                                 *flags |= RTNH_F_DEAD;
1482                         break;
1483                 case AF_INET6:
1484                         if (ip6_ignore_linkdown(nhc->nhc_dev))
1485                                 *flags |= RTNH_F_DEAD;
1486                         break;
1487                 }
1488                 rcu_read_unlock();
1489         }
1490
1491         switch (nhc->nhc_gw_family) {
1492         case AF_INET:
1493                 if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4))
1494                         goto nla_put_failure;
1495                 break;
1496         case AF_INET6:
1497                 /* if gateway family does not match nexthop family
1498                  * gateway is encoded as RTA_VIA
1499                  */
1500                 if (nhc->nhc_gw_family != nhc->nhc_family) {
1501                         int alen = sizeof(struct in6_addr);
1502                         struct nlattr *nla;
1503                         struct rtvia *via;
1504
1505                         nla = nla_reserve(skb, RTA_VIA, alen + 2);
1506                         if (!nla)
1507                                 goto nla_put_failure;
1508
1509                         via = nla_data(nla);
1510                         via->rtvia_family = AF_INET6;
1511                         memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen);
1512                 } else if (nla_put_in6_addr(skb, RTA_GATEWAY,
1513                                             &nhc->nhc_gw.ipv6) < 0) {
1514                         goto nla_put_failure;
1515                 }
1516                 break;
1517         }
1518
1519         *flags |= (nhc->nhc_flags & RTNH_F_ONLINK);
1520         if (nhc->nhc_flags & RTNH_F_OFFLOAD)
1521                 *flags |= RTNH_F_OFFLOAD;
1522
1523         if (!skip_oif && nhc->nhc_dev &&
1524             nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex))
1525                 goto nla_put_failure;
1526
1527         if (nhc->nhc_lwtstate &&
1528             lwtunnel_fill_encap(skb, nhc->nhc_lwtstate,
1529                                 RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
1530                 goto nla_put_failure;
1531
1532         return 0;
1533
1534 nla_put_failure:
1535         return -EMSGSIZE;
1536 }
1537 EXPORT_SYMBOL_GPL(fib_nexthop_info);
1538
1539 #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6)
1540 int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
1541                     int nh_weight)
1542 {
1543         const struct net_device *dev = nhc->nhc_dev;
1544         struct rtnexthop *rtnh;
1545         unsigned char flags = 0;
1546
1547         rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1548         if (!rtnh)
1549                 goto nla_put_failure;
1550
1551         rtnh->rtnh_hops = nh_weight - 1;
1552         rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
1553
1554         if (fib_nexthop_info(skb, nhc, &flags, true) < 0)
1555                 goto nla_put_failure;
1556
1557         rtnh->rtnh_flags = flags;
1558
1559         /* length of rtnetlink header + attributes */
1560         rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
1561
1562         return 0;
1563
1564 nla_put_failure:
1565         return -EMSGSIZE;
1566 }
1567 EXPORT_SYMBOL_GPL(fib_add_nexthop);
1568 #endif
1569
1570 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1571 static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
1572 {
1573         struct nlattr *mp;
1574
1575         mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
1576         if (!mp)
1577                 goto nla_put_failure;
1578
1579         for_nexthops(fi) {
1580                 if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0)
1581                         goto nla_put_failure;
1582 #ifdef CONFIG_IP_ROUTE_CLASSID
1583                 if (nh->nh_tclassid &&
1584                     nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
1585                         goto nla_put_failure;
1586 #endif
1587         } endfor_nexthops(fi);
1588
1589         nla_nest_end(skb, mp);
1590
1591         return 0;
1592
1593 nla_put_failure:
1594         return -EMSGSIZE;
1595 }
1596 #else
1597 static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
1598 {
1599         return 0;
1600 }
1601 #endif
1602
1603 int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
1604                   u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
1605                   struct fib_info *fi, unsigned int flags)
1606 {
1607         unsigned int nhs = fib_info_num_path(fi);
1608         struct nlmsghdr *nlh;
1609         struct rtmsg *rtm;
1610
1611         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
1612         if (!nlh)
1613                 return -EMSGSIZE;
1614
1615         rtm = nlmsg_data(nlh);
1616         rtm->rtm_family = AF_INET;
1617         rtm->rtm_dst_len = dst_len;
1618         rtm->rtm_src_len = 0;
1619         rtm->rtm_tos = tos;
1620         if (tb_id < 256)
1621                 rtm->rtm_table = tb_id;
1622         else
1623                 rtm->rtm_table = RT_TABLE_COMPAT;
1624         if (nla_put_u32(skb, RTA_TABLE, tb_id))
1625                 goto nla_put_failure;
1626         rtm->rtm_type = type;
1627         rtm->rtm_flags = fi->fib_flags;
1628         rtm->rtm_scope = fi->fib_scope;
1629         rtm->rtm_protocol = fi->fib_protocol;
1630
1631         if (rtm->rtm_dst_len &&
1632             nla_put_in_addr(skb, RTA_DST, dst))
1633                 goto nla_put_failure;
1634         if (fi->fib_priority &&
1635             nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
1636                 goto nla_put_failure;
1637         if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
1638                 goto nla_put_failure;
1639
1640         if (fi->fib_prefsrc &&
1641             nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
1642                 goto nla_put_failure;
1643         if (nhs == 1) {
1644                 const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
1645                 unsigned char flags = 0;
1646
1647                 if (fib_nexthop_info(skb, nhc, &flags, false) < 0)
1648                         goto nla_put_failure;
1649
1650                 rtm->rtm_flags = flags;
1651 #ifdef CONFIG_IP_ROUTE_CLASSID
1652                 if (nhc->nhc_family == AF_INET) {
1653                         struct fib_nh *nh;
1654
1655                         nh = container_of(nhc, struct fib_nh, nh_common);
1656                         if (nh->nh_tclassid &&
1657                             nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
1658                                 goto nla_put_failure;
1659                 }
1660 #endif
1661         } else {
1662                 if (fib_add_multipath(skb, fi) < 0)
1663                         goto nla_put_failure;
1664         }
1665
1666         nlmsg_end(skb, nlh);
1667         return 0;
1668
1669 nla_put_failure:
1670         nlmsg_cancel(skb, nlh);
1671         return -EMSGSIZE;
1672 }
1673
1674 /*
1675  * Update FIB if:
1676  * - local address disappeared -> we must delete all the entries
1677  *   referring to it.
1678  * - device went down -> we must shutdown all nexthops going via it.
1679  */
1680 int fib_sync_down_addr(struct net_device *dev, __be32 local)
1681 {
1682         int ret = 0;
1683         unsigned int hash = fib_laddr_hashfn(local);
1684         struct hlist_head *head = &fib_info_laddrhash[hash];
1685         struct net *net = dev_net(dev);
1686         int tb_id = l3mdev_fib_table(dev);
1687         struct fib_info *fi;
1688
1689         if (!fib_info_laddrhash || local == 0)
1690                 return 0;
1691
1692         hlist_for_each_entry(fi, head, fib_lhash) {
1693                 if (!net_eq(fi->fib_net, net) ||
1694                     fi->fib_tb_id != tb_id)
1695                         continue;
1696                 if (fi->fib_prefsrc == local) {
1697                         fi->fib_flags |= RTNH_F_DEAD;
1698                         ret++;
1699                 }
1700         }
1701         return ret;
1702 }
1703
1704 static int call_fib_nh_notifiers(struct fib_nh *nh,
1705                                  enum fib_event_type event_type)
1706 {
1707         bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev);
1708         struct fib_nh_notifier_info info = {
1709                 .fib_nh = nh,
1710         };
1711
1712         switch (event_type) {
1713         case FIB_EVENT_NH_ADD:
1714                 if (nh->fib_nh_flags & RTNH_F_DEAD)
1715                         break;
1716                 if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN)
1717                         break;
1718                 return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type,
1719                                            &info.info);
1720         case FIB_EVENT_NH_DEL:
1721                 if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) ||
1722                     (nh->fib_nh_flags & RTNH_F_DEAD))
1723                         return call_fib4_notifiers(dev_net(nh->fib_nh_dev),
1724                                                    event_type, &info.info);
1725         default:
1726                 break;
1727         }
1728
1729         return NOTIFY_DONE;
1730 }
1731
1732 /* Update the PMTU of exceptions when:
1733  * - the new MTU of the first hop becomes smaller than the PMTU
1734  * - the old MTU was the same as the PMTU, and it limited discovery of
1735  *   larger MTUs on the path. With that limit raised, we can now
1736  *   discover larger MTUs
1737  * A special case is locked exceptions, for which the PMTU is smaller
1738  * than the minimal accepted PMTU:
1739  * - if the new MTU is greater than the PMTU, don't make any change
1740  * - otherwise, unlock and set PMTU
1741  */
1742 void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
1743 {
1744         struct fnhe_hash_bucket *bucket;
1745         int i;
1746
1747         bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1);
1748         if (!bucket)
1749                 return;
1750
1751         for (i = 0; i < FNHE_HASH_SIZE; i++) {
1752                 struct fib_nh_exception *fnhe;
1753
1754                 for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
1755                      fnhe;
1756                      fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
1757                         if (fnhe->fnhe_mtu_locked) {
1758                                 if (new <= fnhe->fnhe_pmtu) {
1759                                         fnhe->fnhe_pmtu = new;
1760                                         fnhe->fnhe_mtu_locked = false;
1761                                 }
1762                         } else if (new < fnhe->fnhe_pmtu ||
1763                                    orig == fnhe->fnhe_pmtu) {
1764                                 fnhe->fnhe_pmtu = new;
1765                         }
1766                 }
1767         }
1768 }
1769
1770 void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
1771 {
1772         unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1773         struct hlist_head *head = &fib_info_devhash[hash];
1774         struct fib_nh *nh;
1775
1776         hlist_for_each_entry(nh, head, nh_hash) {
1777                 if (nh->fib_nh_dev == dev)
1778                         fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
1779         }
1780 }
1781
1782 /* Event              force Flags           Description
1783  * NETDEV_CHANGE      0     LINKDOWN        Carrier OFF, not for scope host
1784  * NETDEV_DOWN        0     LINKDOWN|DEAD   Link down, not for scope host
1785  * NETDEV_DOWN        1     LINKDOWN|DEAD   Last address removed
1786  * NETDEV_UNREGISTER  1     LINKDOWN|DEAD   Device removed
1787  */
1788 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
1789 {
1790         int ret = 0;
1791         int scope = RT_SCOPE_NOWHERE;
1792         struct fib_info *prev_fi = NULL;
1793         unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1794         struct hlist_head *head = &fib_info_devhash[hash];
1795         struct fib_nh *nh;
1796
1797         if (force)
1798                 scope = -1;
1799
1800         hlist_for_each_entry(nh, head, nh_hash) {
1801                 struct fib_info *fi = nh->nh_parent;
1802                 int dead;
1803
1804                 BUG_ON(!fi->fib_nhs);
1805                 if (nh->fib_nh_dev != dev || fi == prev_fi)
1806                         continue;
1807                 prev_fi = fi;
1808                 dead = 0;
1809                 change_nexthops(fi) {
1810                         if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD)
1811                                 dead++;
1812                         else if (nexthop_nh->fib_nh_dev == dev &&
1813                                  nexthop_nh->fib_nh_scope != scope) {
1814                                 switch (event) {
1815                                 case NETDEV_DOWN:
1816                                 case NETDEV_UNREGISTER:
1817                                         nexthop_nh->fib_nh_flags |= RTNH_F_DEAD;
1818                                         /* fall through */
1819                                 case NETDEV_CHANGE:
1820                                         nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
1821                                         break;
1822                                 }
1823                                 call_fib_nh_notifiers(nexthop_nh,
1824                                                       FIB_EVENT_NH_DEL);
1825                                 dead++;
1826                         }
1827 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1828                         if (event == NETDEV_UNREGISTER &&
1829                             nexthop_nh->fib_nh_dev == dev) {
1830                                 dead = fi->fib_nhs;
1831                                 break;
1832                         }
1833 #endif
1834                 } endfor_nexthops(fi)
1835                 if (dead == fi->fib_nhs) {
1836                         switch (event) {
1837                         case NETDEV_DOWN:
1838                         case NETDEV_UNREGISTER:
1839                                 fi->fib_flags |= RTNH_F_DEAD;
1840                                 /* fall through */
1841                         case NETDEV_CHANGE:
1842                                 fi->fib_flags |= RTNH_F_LINKDOWN;
1843                                 break;
1844                         }
1845                         ret++;
1846                 }
1847
1848                 fib_rebalance(fi);
1849         }
1850
1851         return ret;
1852 }
1853
1854 /* Must be invoked inside of an RCU protected region.  */
1855 static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
1856 {
1857         struct fib_info *fi = NULL, *last_resort = NULL;
1858         struct hlist_head *fa_head = res->fa_head;
1859         struct fib_table *tb = res->table;
1860         u8 slen = 32 - res->prefixlen;
1861         int order = -1, last_idx = -1;
1862         struct fib_alias *fa, *fa1 = NULL;
1863         u32 last_prio = res->fi->fib_priority;
1864         u8 last_tos = 0;
1865
1866         hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
1867                 struct fib_info *next_fi = fa->fa_info;
1868                 struct fib_nh *nh;
1869
1870                 if (fa->fa_slen != slen)
1871                         continue;
1872                 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1873                         continue;
1874                 if (fa->tb_id != tb->tb_id)
1875                         continue;
1876                 if (next_fi->fib_priority > last_prio &&
1877                     fa->fa_tos == last_tos) {
1878                         if (last_tos)
1879                                 continue;
1880                         break;
1881                 }
1882                 if (next_fi->fib_flags & RTNH_F_DEAD)
1883                         continue;
1884                 last_tos = fa->fa_tos;
1885                 last_prio = next_fi->fib_priority;
1886
1887                 if (next_fi->fib_scope != res->scope ||
1888                     fa->fa_type != RTN_UNICAST)
1889                         continue;
1890
1891                 nh = fib_info_nh(next_fi, 0);
1892                 if (!nh->fib_nh_gw4 || nh->fib_nh_scope != RT_SCOPE_LINK)
1893                         continue;
1894
1895                 fib_alias_accessed(fa);
1896
1897                 if (!fi) {
1898                         if (next_fi != res->fi)
1899                                 break;
1900                         fa1 = fa;
1901                 } else if (!fib_detect_death(fi, order, &last_resort,
1902                                              &last_idx, fa1->fa_default)) {
1903                         fib_result_assign(res, fi);
1904                         fa1->fa_default = order;
1905                         goto out;
1906                 }
1907                 fi = next_fi;
1908                 order++;
1909         }
1910
1911         if (order <= 0 || !fi) {
1912                 if (fa1)
1913                         fa1->fa_default = -1;
1914                 goto out;
1915         }
1916
1917         if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1918                               fa1->fa_default)) {
1919                 fib_result_assign(res, fi);
1920                 fa1->fa_default = order;
1921                 goto out;
1922         }
1923
1924         if (last_idx >= 0)
1925                 fib_result_assign(res, last_resort);
1926         fa1->fa_default = last_idx;
1927 out:
1928         return;
1929 }
1930
1931 /*
1932  * Dead device goes up. We wake up dead nexthops.
1933  * It takes sense only on multipath routes.
1934  */
1935 int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
1936 {
1937         struct fib_info *prev_fi;
1938         unsigned int hash;
1939         struct hlist_head *head;
1940         struct fib_nh *nh;
1941         int ret;
1942
1943         if (!(dev->flags & IFF_UP))
1944                 return 0;
1945
1946         if (nh_flags & RTNH_F_DEAD) {
1947                 unsigned int flags = dev_get_flags(dev);
1948
1949                 if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1950                         nh_flags |= RTNH_F_LINKDOWN;
1951         }
1952
1953         prev_fi = NULL;
1954         hash = fib_devindex_hashfn(dev->ifindex);
1955         head = &fib_info_devhash[hash];
1956         ret = 0;
1957
1958         hlist_for_each_entry(nh, head, nh_hash) {
1959                 struct fib_info *fi = nh->nh_parent;
1960                 int alive;
1961
1962                 BUG_ON(!fi->fib_nhs);
1963                 if (nh->fib_nh_dev != dev || fi == prev_fi)
1964                         continue;
1965
1966                 prev_fi = fi;
1967                 alive = 0;
1968                 change_nexthops(fi) {
1969                         if (!(nexthop_nh->fib_nh_flags & nh_flags)) {
1970                                 alive++;
1971                                 continue;
1972                         }
1973                         if (!nexthop_nh->fib_nh_dev ||
1974                             !(nexthop_nh->fib_nh_dev->flags & IFF_UP))
1975                                 continue;
1976                         if (nexthop_nh->fib_nh_dev != dev ||
1977                             !__in_dev_get_rtnl(dev))
1978                                 continue;
1979                         alive++;
1980                         nexthop_nh->fib_nh_flags &= ~nh_flags;
1981                         call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);
1982                 } endfor_nexthops(fi)
1983
1984                 if (alive > 0) {
1985                         fi->fib_flags &= ~nh_flags;
1986                         ret++;
1987                 }
1988
1989                 fib_rebalance(fi);
1990         }
1991
1992         return ret;
1993 }
1994
1995 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1996 static bool fib_good_nh(const struct fib_nh *nh)
1997 {
1998         int state = NUD_REACHABLE;
1999
2000         if (nh->fib_nh_scope == RT_SCOPE_LINK) {
2001                 struct neighbour *n;
2002
2003                 rcu_read_lock_bh();
2004
2005                 if (likely(nh->fib_nh_gw_family == AF_INET))
2006                         n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
2007                                                    (__force u32)nh->fib_nh_gw4);
2008                 else if (nh->fib_nh_gw_family == AF_INET6)
2009                         n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev,
2010                                                            &nh->fib_nh_gw6);
2011                 else
2012                         n = NULL;
2013                 if (n)
2014                         state = n->nud_state;
2015
2016                 rcu_read_unlock_bh();
2017         }
2018
2019         return !!(state & NUD_VALID);
2020 }
2021
2022 void fib_select_multipath(struct fib_result *res, int hash)
2023 {
2024         struct fib_info *fi = res->fi;
2025         struct net *net = fi->fib_net;
2026         bool first = false;
2027
2028         change_nexthops(fi) {
2029                 if (net->ipv4.sysctl_fib_multipath_use_neigh) {
2030                         if (!fib_good_nh(nexthop_nh))
2031                                 continue;
2032                         if (!first) {
2033                                 res->nh_sel = nhsel;
2034                                 res->nhc = &nexthop_nh->nh_common;
2035                                 first = true;
2036                         }
2037                 }
2038
2039                 if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound))
2040                         continue;
2041
2042                 res->nh_sel = nhsel;
2043                 res->nhc = &nexthop_nh->nh_common;
2044                 return;
2045         } endfor_nexthops(fi);
2046 }
2047 #endif
2048
2049 void fib_select_path(struct net *net, struct fib_result *res,
2050                      struct flowi4 *fl4, const struct sk_buff *skb)
2051 {
2052         if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
2053                 goto check_saddr;
2054
2055 #ifdef CONFIG_IP_ROUTE_MULTIPATH
2056         if (fib_info_num_path(res->fi) > 1) {
2057                 int h = fib_multipath_hash(net, fl4, skb, NULL);
2058
2059                 fib_select_multipath(res, h);
2060         }
2061         else
2062 #endif
2063         if (!res->prefixlen &&
2064             res->table->tb_num_default > 1 &&
2065             res->type == RTN_UNICAST)
2066                 fib_select_default(fl4, res);
2067
2068 check_saddr:
2069         if (!fl4->saddr)
2070                 fl4->saddr = fib_result_prefsrc(net, res);
2071 }