inet: Avoid potential NULL peer dereference.
[platform/adaptation/renesas_rcar/renesas_kernel.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68                                     const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int      ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void             ip6_dst_destroy(struct dst_entry *);
74 static void             ip6_dst_ifdown(struct dst_entry *,
75                                        struct net_device *dev, int how);
76 static int               ip6_dst_gc(struct dst_ops *ops);
77
78 static int              ip6_pkt_discard(struct sk_buff *skb);
79 static int              ip6_pkt_discard_out(struct sk_buff *skb);
80 static void             ip6_link_failure(struct sk_buff *skb);
81 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
83 #ifdef CONFIG_IPV6_ROUTE_INFO
84 static struct rt6_info *rt6_add_route_info(struct net *net,
85                                            const struct in6_addr *prefix, int prefixlen,
86                                            const struct in6_addr *gwaddr, int ifindex,
87                                            unsigned int pref);
88 static struct rt6_info *rt6_get_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex);
91 #endif
92
93 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94 {
95         struct rt6_info *rt = (struct rt6_info *) dst;
96         struct inet_peer *peer;
97         u32 *p = NULL;
98
99         if (!(rt->dst.flags & DST_HOST))
100                 return NULL;
101
102         peer = rt6_get_peer_create(rt);
103         if (peer) {
104                 u32 *old_p = __DST_METRICS_PTR(old);
105                 unsigned long prev, new;
106
107                 p = peer->metrics;
108                 if (inet_metrics_new(peer))
109                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111                 new = (unsigned long) p;
112                 prev = cmpxchg(&dst->_metrics, old, new);
113
114                 if (prev != old) {
115                         p = __DST_METRICS_PTR(prev);
116                         if (prev & DST_METRICS_READ_ONLY)
117                                 p = NULL;
118                 }
119         }
120         return p;
121 }
122
123 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
124 {
125         struct in6_addr *p = &rt->rt6i_gateway;
126
127         if (!ipv6_addr_any(p))
128                 return (const void *) p;
129         return daddr;
130 }
131
132 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
133 {
134         struct rt6_info *rt = (struct rt6_info *) dst;
135         struct neighbour *n;
136
137         daddr = choose_neigh_daddr(rt, daddr);
138         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
139         if (n)
140                 return n;
141         return neigh_create(&nd_tbl, daddr, dst->dev);
142 }
143
144 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
145 {
146         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
147         if (!n) {
148                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
149                 if (IS_ERR(n))
150                         return PTR_ERR(n);
151         }
152         dst_set_neighbour(&rt->dst, n);
153
154         return 0;
155 }
156
157 static struct dst_ops ip6_dst_ops_template = {
158         .family                 =       AF_INET6,
159         .protocol               =       cpu_to_be16(ETH_P_IPV6),
160         .gc                     =       ip6_dst_gc,
161         .gc_thresh              =       1024,
162         .check                  =       ip6_dst_check,
163         .default_advmss         =       ip6_default_advmss,
164         .mtu                    =       ip6_mtu,
165         .cow_metrics            =       ipv6_cow_metrics,
166         .destroy                =       ip6_dst_destroy,
167         .ifdown                 =       ip6_dst_ifdown,
168         .negative_advice        =       ip6_negative_advice,
169         .link_failure           =       ip6_link_failure,
170         .update_pmtu            =       ip6_rt_update_pmtu,
171         .local_out              =       __ip6_local_out,
172         .neigh_lookup           =       ip6_neigh_lookup,
173 };
174
175 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
176 {
177         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
178
179         return mtu ? : dst->dev->mtu;
180 }
181
182 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
183 {
184 }
185
186 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
187                                          unsigned long old)
188 {
189         return NULL;
190 }
191
192 static struct dst_ops ip6_dst_blackhole_ops = {
193         .family                 =       AF_INET6,
194         .protocol               =       cpu_to_be16(ETH_P_IPV6),
195         .destroy                =       ip6_dst_destroy,
196         .check                  =       ip6_dst_check,
197         .mtu                    =       ip6_blackhole_mtu,
198         .default_advmss         =       ip6_default_advmss,
199         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
200         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
201         .neigh_lookup           =       ip6_neigh_lookup,
202 };
203
204 static const u32 ip6_template_metrics[RTAX_MAX] = {
205         [RTAX_HOPLIMIT - 1] = 255,
206 };
207
208 static struct rt6_info ip6_null_entry_template = {
209         .dst = {
210                 .__refcnt       = ATOMIC_INIT(1),
211                 .__use          = 1,
212                 .obsolete       = -1,
213                 .error          = -ENETUNREACH,
214                 .input          = ip6_pkt_discard,
215                 .output         = ip6_pkt_discard_out,
216         },
217         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
218         .rt6i_protocol  = RTPROT_KERNEL,
219         .rt6i_metric    = ~(u32) 0,
220         .rt6i_ref       = ATOMIC_INIT(1),
221 };
222
223 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
224
225 static int ip6_pkt_prohibit(struct sk_buff *skb);
226 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
227
228 static struct rt6_info ip6_prohibit_entry_template = {
229         .dst = {
230                 .__refcnt       = ATOMIC_INIT(1),
231                 .__use          = 1,
232                 .obsolete       = -1,
233                 .error          = -EACCES,
234                 .input          = ip6_pkt_prohibit,
235                 .output         = ip6_pkt_prohibit_out,
236         },
237         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
238         .rt6i_protocol  = RTPROT_KERNEL,
239         .rt6i_metric    = ~(u32) 0,
240         .rt6i_ref       = ATOMIC_INIT(1),
241 };
242
243 static struct rt6_info ip6_blk_hole_entry_template = {
244         .dst = {
245                 .__refcnt       = ATOMIC_INIT(1),
246                 .__use          = 1,
247                 .obsolete       = -1,
248                 .error          = -EINVAL,
249                 .input          = dst_discard,
250                 .output         = dst_discard,
251         },
252         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
253         .rt6i_protocol  = RTPROT_KERNEL,
254         .rt6i_metric    = ~(u32) 0,
255         .rt6i_ref       = ATOMIC_INIT(1),
256 };
257
258 #endif
259
260 /* allocate dst with ip6_dst_ops */
261 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
262                                              struct net_device *dev,
263                                              int flags,
264                                              struct fib6_table *table)
265 {
266         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
267                                         0, 0, flags);
268
269         if (rt) {
270                 memset(&rt->rt6i_table, 0,
271                        sizeof(*rt) - sizeof(struct dst_entry));
272                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
273         }
274         return rt;
275 }
276
277 static void ip6_dst_destroy(struct dst_entry *dst)
278 {
279         struct rt6_info *rt = (struct rt6_info *)dst;
280         struct inet6_dev *idev = rt->rt6i_idev;
281
282         if (!(rt->dst.flags & DST_HOST))
283                 dst_destroy_metrics_generic(dst);
284
285         if (idev) {
286                 rt->rt6i_idev = NULL;
287                 in6_dev_put(idev);
288         }
289
290         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
291                 dst_release(dst->from);
292
293         if (rt6_has_peer(rt)) {
294                 struct inet_peer *peer = rt6_peer_ptr(rt);
295                 inet_putpeer(peer);
296         }
297 }
298
299 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
300
301 static u32 rt6_peer_genid(void)
302 {
303         return atomic_read(&__rt6_peer_genid);
304 }
305
306 void rt6_bind_peer(struct rt6_info *rt, int create)
307 {
308         struct inet_peer_base *base;
309         struct inet_peer *peer;
310
311         base = inetpeer_base_ptr(rt->_rt6i_peer);
312         if (!base)
313                 return;
314
315         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
316         if (peer) {
317                 if (!rt6_set_peer(rt, peer))
318                         inet_putpeer(peer);
319                 else
320                         rt->rt6i_peer_genid = rt6_peer_genid();
321         }
322 }
323
324 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
325                            int how)
326 {
327         struct rt6_info *rt = (struct rt6_info *)dst;
328         struct inet6_dev *idev = rt->rt6i_idev;
329         struct net_device *loopback_dev =
330                 dev_net(dev)->loopback_dev;
331
332         if (dev != loopback_dev && idev && idev->dev == dev) {
333                 struct inet6_dev *loopback_idev =
334                         in6_dev_get(loopback_dev);
335                 if (loopback_idev) {
336                         rt->rt6i_idev = loopback_idev;
337                         in6_dev_put(idev);
338                 }
339         }
340 }
341
342 static bool rt6_check_expired(const struct rt6_info *rt)
343 {
344         struct rt6_info *ort = NULL;
345
346         if (rt->rt6i_flags & RTF_EXPIRES) {
347                 if (time_after(jiffies, rt->dst.expires))
348                         return true;
349         } else if (rt->dst.from) {
350                 ort = (struct rt6_info *) rt->dst.from;
351                 return (ort->rt6i_flags & RTF_EXPIRES) &&
352                         time_after(jiffies, ort->dst.expires);
353         }
354         return false;
355 }
356
357 static bool rt6_need_strict(const struct in6_addr *daddr)
358 {
359         return ipv6_addr_type(daddr) &
360                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
361 }
362
363 /*
364  *      Route lookup. Any table->tb6_lock is implied.
365  */
366
367 static inline struct rt6_info *rt6_device_match(struct net *net,
368                                                     struct rt6_info *rt,
369                                                     const struct in6_addr *saddr,
370                                                     int oif,
371                                                     int flags)
372 {
373         struct rt6_info *local = NULL;
374         struct rt6_info *sprt;
375
376         if (!oif && ipv6_addr_any(saddr))
377                 goto out;
378
379         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
380                 struct net_device *dev = sprt->dst.dev;
381
382                 if (oif) {
383                         if (dev->ifindex == oif)
384                                 return sprt;
385                         if (dev->flags & IFF_LOOPBACK) {
386                                 if (!sprt->rt6i_idev ||
387                                     sprt->rt6i_idev->dev->ifindex != oif) {
388                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
389                                                 continue;
390                                         if (local && (!oif ||
391                                                       local->rt6i_idev->dev->ifindex == oif))
392                                                 continue;
393                                 }
394                                 local = sprt;
395                         }
396                 } else {
397                         if (ipv6_chk_addr(net, saddr, dev,
398                                           flags & RT6_LOOKUP_F_IFACE))
399                                 return sprt;
400                 }
401         }
402
403         if (oif) {
404                 if (local)
405                         return local;
406
407                 if (flags & RT6_LOOKUP_F_IFACE)
408                         return net->ipv6.ip6_null_entry;
409         }
410 out:
411         return rt;
412 }
413
414 #ifdef CONFIG_IPV6_ROUTER_PREF
415 static void rt6_probe(struct rt6_info *rt)
416 {
417         struct neighbour *neigh;
418         /*
419          * Okay, this does not seem to be appropriate
420          * for now, however, we need to check if it
421          * is really so; aka Router Reachability Probing.
422          *
423          * Router Reachability Probe MUST be rate-limited
424          * to no more than one per minute.
425          */
426         rcu_read_lock();
427         neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
428         if (!neigh || (neigh->nud_state & NUD_VALID))
429                 goto out;
430         read_lock_bh(&neigh->lock);
431         if (!(neigh->nud_state & NUD_VALID) &&
432             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
433                 struct in6_addr mcaddr;
434                 struct in6_addr *target;
435
436                 neigh->updated = jiffies;
437                 read_unlock_bh(&neigh->lock);
438
439                 target = (struct in6_addr *)&neigh->primary_key;
440                 addrconf_addr_solict_mult(target, &mcaddr);
441                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
442         } else {
443                 read_unlock_bh(&neigh->lock);
444         }
445 out:
446         rcu_read_unlock();
447 }
448 #else
449 static inline void rt6_probe(struct rt6_info *rt)
450 {
451 }
452 #endif
453
454 /*
455  * Default Router Selection (RFC 2461 6.3.6)
456  */
457 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
458 {
459         struct net_device *dev = rt->dst.dev;
460         if (!oif || dev->ifindex == oif)
461                 return 2;
462         if ((dev->flags & IFF_LOOPBACK) &&
463             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
464                 return 1;
465         return 0;
466 }
467
468 static inline int rt6_check_neigh(struct rt6_info *rt)
469 {
470         struct neighbour *neigh;
471         int m;
472
473         rcu_read_lock();
474         neigh = dst_get_neighbour_noref(&rt->dst);
475         if (rt->rt6i_flags & RTF_NONEXTHOP ||
476             !(rt->rt6i_flags & RTF_GATEWAY))
477                 m = 1;
478         else if (neigh) {
479                 read_lock_bh(&neigh->lock);
480                 if (neigh->nud_state & NUD_VALID)
481                         m = 2;
482 #ifdef CONFIG_IPV6_ROUTER_PREF
483                 else if (neigh->nud_state & NUD_FAILED)
484                         m = 0;
485 #endif
486                 else
487                         m = 1;
488                 read_unlock_bh(&neigh->lock);
489         } else
490                 m = 0;
491         rcu_read_unlock();
492         return m;
493 }
494
495 static int rt6_score_route(struct rt6_info *rt, int oif,
496                            int strict)
497 {
498         int m, n;
499
500         m = rt6_check_dev(rt, oif);
501         if (!m && (strict & RT6_LOOKUP_F_IFACE))
502                 return -1;
503 #ifdef CONFIG_IPV6_ROUTER_PREF
504         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
505 #endif
506         n = rt6_check_neigh(rt);
507         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
508                 return -1;
509         return m;
510 }
511
512 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
513                                    int *mpri, struct rt6_info *match)
514 {
515         int m;
516
517         if (rt6_check_expired(rt))
518                 goto out;
519
520         m = rt6_score_route(rt, oif, strict);
521         if (m < 0)
522                 goto out;
523
524         if (m > *mpri) {
525                 if (strict & RT6_LOOKUP_F_REACHABLE)
526                         rt6_probe(match);
527                 *mpri = m;
528                 match = rt;
529         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
530                 rt6_probe(rt);
531         }
532
533 out:
534         return match;
535 }
536
537 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
538                                      struct rt6_info *rr_head,
539                                      u32 metric, int oif, int strict)
540 {
541         struct rt6_info *rt, *match;
542         int mpri = -1;
543
544         match = NULL;
545         for (rt = rr_head; rt && rt->rt6i_metric == metric;
546              rt = rt->dst.rt6_next)
547                 match = find_match(rt, oif, strict, &mpri, match);
548         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
549              rt = rt->dst.rt6_next)
550                 match = find_match(rt, oif, strict, &mpri, match);
551
552         return match;
553 }
554
555 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
556 {
557         struct rt6_info *match, *rt0;
558         struct net *net;
559
560         rt0 = fn->rr_ptr;
561         if (!rt0)
562                 fn->rr_ptr = rt0 = fn->leaf;
563
564         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
565
566         if (!match &&
567             (strict & RT6_LOOKUP_F_REACHABLE)) {
568                 struct rt6_info *next = rt0->dst.rt6_next;
569
570                 /* no entries matched; do round-robin */
571                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
572                         next = fn->leaf;
573
574                 if (next != rt0)
575                         fn->rr_ptr = next;
576         }
577
578         net = dev_net(rt0->dst.dev);
579         return match ? match : net->ipv6.ip6_null_entry;
580 }
581
582 #ifdef CONFIG_IPV6_ROUTE_INFO
583 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
584                   const struct in6_addr *gwaddr)
585 {
586         struct net *net = dev_net(dev);
587         struct route_info *rinfo = (struct route_info *) opt;
588         struct in6_addr prefix_buf, *prefix;
589         unsigned int pref;
590         unsigned long lifetime;
591         struct rt6_info *rt;
592
593         if (len < sizeof(struct route_info)) {
594                 return -EINVAL;
595         }
596
597         /* Sanity check for prefix_len and length */
598         if (rinfo->length > 3) {
599                 return -EINVAL;
600         } else if (rinfo->prefix_len > 128) {
601                 return -EINVAL;
602         } else if (rinfo->prefix_len > 64) {
603                 if (rinfo->length < 2) {
604                         return -EINVAL;
605                 }
606         } else if (rinfo->prefix_len > 0) {
607                 if (rinfo->length < 1) {
608                         return -EINVAL;
609                 }
610         }
611
612         pref = rinfo->route_pref;
613         if (pref == ICMPV6_ROUTER_PREF_INVALID)
614                 return -EINVAL;
615
616         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
617
618         if (rinfo->length == 3)
619                 prefix = (struct in6_addr *)rinfo->prefix;
620         else {
621                 /* this function is safe */
622                 ipv6_addr_prefix(&prefix_buf,
623                                  (struct in6_addr *)rinfo->prefix,
624                                  rinfo->prefix_len);
625                 prefix = &prefix_buf;
626         }
627
628         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
629                                 dev->ifindex);
630
631         if (rt && !lifetime) {
632                 ip6_del_rt(rt);
633                 rt = NULL;
634         }
635
636         if (!rt && lifetime)
637                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
638                                         pref);
639         else if (rt)
640                 rt->rt6i_flags = RTF_ROUTEINFO |
641                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
642
643         if (rt) {
644                 if (!addrconf_finite_timeout(lifetime))
645                         rt6_clean_expires(rt);
646                 else
647                         rt6_set_expires(rt, jiffies + HZ * lifetime);
648
649                 dst_release(&rt->dst);
650         }
651         return 0;
652 }
653 #endif
654
655 #define BACKTRACK(__net, saddr)                 \
656 do { \
657         if (rt == __net->ipv6.ip6_null_entry) { \
658                 struct fib6_node *pn; \
659                 while (1) { \
660                         if (fn->fn_flags & RTN_TL_ROOT) \
661                                 goto out; \
662                         pn = fn->parent; \
663                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
664                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
665                         else \
666                                 fn = pn; \
667                         if (fn->fn_flags & RTN_RTINFO) \
668                                 goto restart; \
669                 } \
670         } \
671 } while (0)
672
673 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
674                                              struct fib6_table *table,
675                                              struct flowi6 *fl6, int flags)
676 {
677         struct fib6_node *fn;
678         struct rt6_info *rt;
679
680         read_lock_bh(&table->tb6_lock);
681         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
682 restart:
683         rt = fn->leaf;
684         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
685         BACKTRACK(net, &fl6->saddr);
686 out:
687         dst_use(&rt->dst, jiffies);
688         read_unlock_bh(&table->tb6_lock);
689         return rt;
690
691 }
692
693 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
694                                     int flags)
695 {
696         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
697 }
698 EXPORT_SYMBOL_GPL(ip6_route_lookup);
699
700 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
701                             const struct in6_addr *saddr, int oif, int strict)
702 {
703         struct flowi6 fl6 = {
704                 .flowi6_oif = oif,
705                 .daddr = *daddr,
706         };
707         struct dst_entry *dst;
708         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
709
710         if (saddr) {
711                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
712                 flags |= RT6_LOOKUP_F_HAS_SADDR;
713         }
714
715         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
716         if (dst->error == 0)
717                 return (struct rt6_info *) dst;
718
719         dst_release(dst);
720
721         return NULL;
722 }
723
724 EXPORT_SYMBOL(rt6_lookup);
725
726 /* ip6_ins_rt is called with FREE table->tb6_lock.
727    It takes new route entry, the addition fails by any reason the
728    route is freed. In any case, if caller does not hold it, it may
729    be destroyed.
730  */
731
732 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
733 {
734         int err;
735         struct fib6_table *table;
736
737         table = rt->rt6i_table;
738         write_lock_bh(&table->tb6_lock);
739         err = fib6_add(&table->tb6_root, rt, info);
740         write_unlock_bh(&table->tb6_lock);
741
742         return err;
743 }
744
745 int ip6_ins_rt(struct rt6_info *rt)
746 {
747         struct nl_info info = {
748                 .nl_net = dev_net(rt->dst.dev),
749         };
750         return __ip6_ins_rt(rt, &info);
751 }
752
753 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
754                                       const struct in6_addr *daddr,
755                                       const struct in6_addr *saddr)
756 {
757         struct rt6_info *rt;
758
759         /*
760          *      Clone the route.
761          */
762
763         rt = ip6_rt_copy(ort, daddr);
764
765         if (rt) {
766                 int attempts = !in_softirq();
767
768                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
769                         if (ort->rt6i_dst.plen != 128 &&
770                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
771                                 rt->rt6i_flags |= RTF_ANYCAST;
772                         rt->rt6i_gateway = *daddr;
773                 }
774
775                 rt->rt6i_flags |= RTF_CACHE;
776
777 #ifdef CONFIG_IPV6_SUBTREES
778                 if (rt->rt6i_src.plen && saddr) {
779                         rt->rt6i_src.addr = *saddr;
780                         rt->rt6i_src.plen = 128;
781                 }
782 #endif
783
784         retry:
785                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
786                         struct net *net = dev_net(rt->dst.dev);
787                         int saved_rt_min_interval =
788                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
789                         int saved_rt_elasticity =
790                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
791
792                         if (attempts-- > 0) {
793                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
794                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
795
796                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
797
798                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
799                                         saved_rt_elasticity;
800                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
801                                         saved_rt_min_interval;
802                                 goto retry;
803                         }
804
805                         net_warn_ratelimited("Neighbour table overflow\n");
806                         dst_free(&rt->dst);
807                         return NULL;
808                 }
809         }
810
811         return rt;
812 }
813
814 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
815                                         const struct in6_addr *daddr)
816 {
817         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
818
819         if (rt) {
820                 rt->rt6i_flags |= RTF_CACHE;
821                 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
822         }
823         return rt;
824 }
825
826 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
827                                       struct flowi6 *fl6, int flags)
828 {
829         struct fib6_node *fn;
830         struct rt6_info *rt, *nrt;
831         int strict = 0;
832         int attempts = 3;
833         int err;
834         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
835
836         strict |= flags & RT6_LOOKUP_F_IFACE;
837
838 relookup:
839         read_lock_bh(&table->tb6_lock);
840
841 restart_2:
842         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
843
844 restart:
845         rt = rt6_select(fn, oif, strict | reachable);
846
847         BACKTRACK(net, &fl6->saddr);
848         if (rt == net->ipv6.ip6_null_entry ||
849             rt->rt6i_flags & RTF_CACHE)
850                 goto out;
851
852         dst_hold(&rt->dst);
853         read_unlock_bh(&table->tb6_lock);
854
855         if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
856                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
857         else if (!(rt->dst.flags & DST_HOST))
858                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
859         else
860                 goto out2;
861
862         dst_release(&rt->dst);
863         rt = nrt ? : net->ipv6.ip6_null_entry;
864
865         dst_hold(&rt->dst);
866         if (nrt) {
867                 err = ip6_ins_rt(nrt);
868                 if (!err)
869                         goto out2;
870         }
871
872         if (--attempts <= 0)
873                 goto out2;
874
875         /*
876          * Race condition! In the gap, when table->tb6_lock was
877          * released someone could insert this route.  Relookup.
878          */
879         dst_release(&rt->dst);
880         goto relookup;
881
882 out:
883         if (reachable) {
884                 reachable = 0;
885                 goto restart_2;
886         }
887         dst_hold(&rt->dst);
888         read_unlock_bh(&table->tb6_lock);
889 out2:
890         rt->dst.lastuse = jiffies;
891         rt->dst.__use++;
892
893         return rt;
894 }
895
896 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
897                                             struct flowi6 *fl6, int flags)
898 {
899         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
900 }
901
902 static struct dst_entry *ip6_route_input_lookup(struct net *net,
903                                                 struct net_device *dev,
904                                                 struct flowi6 *fl6, int flags)
905 {
906         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
907                 flags |= RT6_LOOKUP_F_IFACE;
908
909         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
910 }
911
912 void ip6_route_input(struct sk_buff *skb)
913 {
914         const struct ipv6hdr *iph = ipv6_hdr(skb);
915         struct net *net = dev_net(skb->dev);
916         int flags = RT6_LOOKUP_F_HAS_SADDR;
917         struct flowi6 fl6 = {
918                 .flowi6_iif = skb->dev->ifindex,
919                 .daddr = iph->daddr,
920                 .saddr = iph->saddr,
921                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
922                 .flowi6_mark = skb->mark,
923                 .flowi6_proto = iph->nexthdr,
924         };
925
926         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
927 }
928
929 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
930                                              struct flowi6 *fl6, int flags)
931 {
932         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
933 }
934
935 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
936                                     struct flowi6 *fl6)
937 {
938         int flags = 0;
939
940         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
941                 flags |= RT6_LOOKUP_F_IFACE;
942
943         if (!ipv6_addr_any(&fl6->saddr))
944                 flags |= RT6_LOOKUP_F_HAS_SADDR;
945         else if (sk)
946                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
947
948         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
949 }
950
951 EXPORT_SYMBOL(ip6_route_output);
952
953 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
954 {
955         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
956         struct dst_entry *new = NULL;
957
958         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
959         if (rt) {
960                 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
961                 rt6_init_peer(rt, net->ipv6.peers);
962
963                 new = &rt->dst;
964
965                 new->__use = 1;
966                 new->input = dst_discard;
967                 new->output = dst_discard;
968
969                 if (dst_metrics_read_only(&ort->dst))
970                         new->_metrics = ort->dst._metrics;
971                 else
972                         dst_copy_metrics(new, &ort->dst);
973                 rt->rt6i_idev = ort->rt6i_idev;
974                 if (rt->rt6i_idev)
975                         in6_dev_hold(rt->rt6i_idev);
976
977                 rt->rt6i_gateway = ort->rt6i_gateway;
978                 rt->rt6i_flags = ort->rt6i_flags;
979                 rt6_clean_expires(rt);
980                 rt->rt6i_metric = 0;
981
982                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
983 #ifdef CONFIG_IPV6_SUBTREES
984                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
985 #endif
986
987                 dst_free(new);
988         }
989
990         dst_release(dst_orig);
991         return new ? new : ERR_PTR(-ENOMEM);
992 }
993
994 /*
995  *      Destination cache support functions
996  */
997
998 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
999 {
1000         struct rt6_info *rt;
1001
1002         rt = (struct rt6_info *) dst;
1003
1004         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1005                 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1006                         if (!rt6_has_peer(rt))
1007                                 rt6_bind_peer(rt, 0);
1008                         rt->rt6i_peer_genid = rt6_peer_genid();
1009                 }
1010                 return dst;
1011         }
1012         return NULL;
1013 }
1014
1015 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1016 {
1017         struct rt6_info *rt = (struct rt6_info *) dst;
1018
1019         if (rt) {
1020                 if (rt->rt6i_flags & RTF_CACHE) {
1021                         if (rt6_check_expired(rt)) {
1022                                 ip6_del_rt(rt);
1023                                 dst = NULL;
1024                         }
1025                 } else {
1026                         dst_release(dst);
1027                         dst = NULL;
1028                 }
1029         }
1030         return dst;
1031 }
1032
1033 static void ip6_link_failure(struct sk_buff *skb)
1034 {
1035         struct rt6_info *rt;
1036
1037         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1038
1039         rt = (struct rt6_info *) skb_dst(skb);
1040         if (rt) {
1041                 if (rt->rt6i_flags & RTF_CACHE)
1042                         rt6_update_expires(rt, 0);
1043                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1044                         rt->rt6i_node->fn_sernum = -1;
1045         }
1046 }
1047
1048 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1049 {
1050         struct rt6_info *rt6 = (struct rt6_info*)dst;
1051
1052         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1053                 rt6->rt6i_flags |= RTF_MODIFIED;
1054                 if (mtu < IPV6_MIN_MTU) {
1055                         u32 features = dst_metric(dst, RTAX_FEATURES);
1056                         mtu = IPV6_MIN_MTU;
1057                         features |= RTAX_FEATURE_ALLFRAG;
1058                         dst_metric_set(dst, RTAX_FEATURES, features);
1059                 }
1060                 dst_metric_set(dst, RTAX_MTU, mtu);
1061         }
1062 }
1063
1064 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1065 {
1066         struct net_device *dev = dst->dev;
1067         unsigned int mtu = dst_mtu(dst);
1068         struct net *net = dev_net(dev);
1069
1070         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1071
1072         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1073                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1074
1075         /*
1076          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1077          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1078          * IPV6_MAXPLEN is also valid and means: "any MSS,
1079          * rely only on pmtu discovery"
1080          */
1081         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1082                 mtu = IPV6_MAXPLEN;
1083         return mtu;
1084 }
1085
1086 static unsigned int ip6_mtu(const struct dst_entry *dst)
1087 {
1088         struct inet6_dev *idev;
1089         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1090
1091         if (mtu)
1092                 return mtu;
1093
1094         mtu = IPV6_MIN_MTU;
1095
1096         rcu_read_lock();
1097         idev = __in6_dev_get(dst->dev);
1098         if (idev)
1099                 mtu = idev->cnf.mtu6;
1100         rcu_read_unlock();
1101
1102         return mtu;
1103 }
1104
1105 static struct dst_entry *icmp6_dst_gc_list;
1106 static DEFINE_SPINLOCK(icmp6_dst_lock);
1107
1108 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1109                                   struct neighbour *neigh,
1110                                   struct flowi6 *fl6)
1111 {
1112         struct dst_entry *dst;
1113         struct rt6_info *rt;
1114         struct inet6_dev *idev = in6_dev_get(dev);
1115         struct net *net = dev_net(dev);
1116
1117         if (unlikely(!idev))
1118                 return ERR_PTR(-ENODEV);
1119
1120         rt = ip6_dst_alloc(net, dev, 0, NULL);
1121         if (unlikely(!rt)) {
1122                 in6_dev_put(idev);
1123                 dst = ERR_PTR(-ENOMEM);
1124                 goto out;
1125         }
1126
1127         if (neigh)
1128                 neigh_hold(neigh);
1129         else {
1130                 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1131                 if (IS_ERR(neigh)) {
1132                         in6_dev_put(idev);
1133                         dst_free(&rt->dst);
1134                         return ERR_CAST(neigh);
1135                 }
1136         }
1137
1138         rt->dst.flags |= DST_HOST;
1139         rt->dst.output  = ip6_output;
1140         dst_set_neighbour(&rt->dst, neigh);
1141         atomic_set(&rt->dst.__refcnt, 1);
1142         rt->rt6i_dst.addr = fl6->daddr;
1143         rt->rt6i_dst.plen = 128;
1144         rt->rt6i_idev     = idev;
1145         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1146
1147         spin_lock_bh(&icmp6_dst_lock);
1148         rt->dst.next = icmp6_dst_gc_list;
1149         icmp6_dst_gc_list = &rt->dst;
1150         spin_unlock_bh(&icmp6_dst_lock);
1151
1152         fib6_force_start_gc(net);
1153
1154         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1155
1156 out:
1157         return dst;
1158 }
1159
1160 int icmp6_dst_gc(void)
1161 {
1162         struct dst_entry *dst, **pprev;
1163         int more = 0;
1164
1165         spin_lock_bh(&icmp6_dst_lock);
1166         pprev = &icmp6_dst_gc_list;
1167
1168         while ((dst = *pprev) != NULL) {
1169                 if (!atomic_read(&dst->__refcnt)) {
1170                         *pprev = dst->next;
1171                         dst_free(dst);
1172                 } else {
1173                         pprev = &dst->next;
1174                         ++more;
1175                 }
1176         }
1177
1178         spin_unlock_bh(&icmp6_dst_lock);
1179
1180         return more;
1181 }
1182
1183 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1184                             void *arg)
1185 {
1186         struct dst_entry *dst, **pprev;
1187
1188         spin_lock_bh(&icmp6_dst_lock);
1189         pprev = &icmp6_dst_gc_list;
1190         while ((dst = *pprev) != NULL) {
1191                 struct rt6_info *rt = (struct rt6_info *) dst;
1192                 if (func(rt, arg)) {
1193                         *pprev = dst->next;
1194                         dst_free(dst);
1195                 } else {
1196                         pprev = &dst->next;
1197                 }
1198         }
1199         spin_unlock_bh(&icmp6_dst_lock);
1200 }
1201
1202 static int ip6_dst_gc(struct dst_ops *ops)
1203 {
1204         unsigned long now = jiffies;
1205         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1206         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1207         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1208         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1209         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1210         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1211         int entries;
1212
1213         entries = dst_entries_get_fast(ops);
1214         if (time_after(rt_last_gc + rt_min_interval, now) &&
1215             entries <= rt_max_size)
1216                 goto out;
1217
1218         net->ipv6.ip6_rt_gc_expire++;
1219         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1220         net->ipv6.ip6_rt_last_gc = now;
1221         entries = dst_entries_get_slow(ops);
1222         if (entries < ops->gc_thresh)
1223                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1224 out:
1225         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1226         return entries > rt_max_size;
1227 }
1228
1229 /* Clean host part of a prefix. Not necessary in radix tree,
1230    but results in cleaner routing tables.
1231
1232    Remove it only when all the things will work!
1233  */
1234
1235 int ip6_dst_hoplimit(struct dst_entry *dst)
1236 {
1237         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1238         if (hoplimit == 0) {
1239                 struct net_device *dev = dst->dev;
1240                 struct inet6_dev *idev;
1241
1242                 rcu_read_lock();
1243                 idev = __in6_dev_get(dev);
1244                 if (idev)
1245                         hoplimit = idev->cnf.hop_limit;
1246                 else
1247                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1248                 rcu_read_unlock();
1249         }
1250         return hoplimit;
1251 }
1252 EXPORT_SYMBOL(ip6_dst_hoplimit);
1253
1254 /*
1255  *
1256  */
1257
1258 int ip6_route_add(struct fib6_config *cfg)
1259 {
1260         int err;
1261         struct net *net = cfg->fc_nlinfo.nl_net;
1262         struct rt6_info *rt = NULL;
1263         struct net_device *dev = NULL;
1264         struct inet6_dev *idev = NULL;
1265         struct fib6_table *table;
1266         int addr_type;
1267
1268         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1269                 return -EINVAL;
1270 #ifndef CONFIG_IPV6_SUBTREES
1271         if (cfg->fc_src_len)
1272                 return -EINVAL;
1273 #endif
1274         if (cfg->fc_ifindex) {
1275                 err = -ENODEV;
1276                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1277                 if (!dev)
1278                         goto out;
1279                 idev = in6_dev_get(dev);
1280                 if (!idev)
1281                         goto out;
1282         }
1283
1284         if (cfg->fc_metric == 0)
1285                 cfg->fc_metric = IP6_RT_PRIO_USER;
1286
1287         err = -ENOBUFS;
1288         if (cfg->fc_nlinfo.nlh &&
1289             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1290                 table = fib6_get_table(net, cfg->fc_table);
1291                 if (!table) {
1292                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1293                         table = fib6_new_table(net, cfg->fc_table);
1294                 }
1295         } else {
1296                 table = fib6_new_table(net, cfg->fc_table);
1297         }
1298
1299         if (!table)
1300                 goto out;
1301
1302         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1303
1304         if (!rt) {
1305                 err = -ENOMEM;
1306                 goto out;
1307         }
1308
1309         rt->dst.obsolete = -1;
1310
1311         if (cfg->fc_flags & RTF_EXPIRES)
1312                 rt6_set_expires(rt, jiffies +
1313                                 clock_t_to_jiffies(cfg->fc_expires));
1314         else
1315                 rt6_clean_expires(rt);
1316
1317         if (cfg->fc_protocol == RTPROT_UNSPEC)
1318                 cfg->fc_protocol = RTPROT_BOOT;
1319         rt->rt6i_protocol = cfg->fc_protocol;
1320
1321         addr_type = ipv6_addr_type(&cfg->fc_dst);
1322
1323         if (addr_type & IPV6_ADDR_MULTICAST)
1324                 rt->dst.input = ip6_mc_input;
1325         else if (cfg->fc_flags & RTF_LOCAL)
1326                 rt->dst.input = ip6_input;
1327         else
1328                 rt->dst.input = ip6_forward;
1329
1330         rt->dst.output = ip6_output;
1331
1332         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1333         rt->rt6i_dst.plen = cfg->fc_dst_len;
1334         if (rt->rt6i_dst.plen == 128)
1335                rt->dst.flags |= DST_HOST;
1336
1337         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1338                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1339                 if (!metrics) {
1340                         err = -ENOMEM;
1341                         goto out;
1342                 }
1343                 dst_init_metrics(&rt->dst, metrics, 0);
1344         }
1345 #ifdef CONFIG_IPV6_SUBTREES
1346         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1347         rt->rt6i_src.plen = cfg->fc_src_len;
1348 #endif
1349
1350         rt->rt6i_metric = cfg->fc_metric;
1351
1352         /* We cannot add true routes via loopback here,
1353            they would result in kernel looping; promote them to reject routes
1354          */
1355         if ((cfg->fc_flags & RTF_REJECT) ||
1356             (dev && (dev->flags & IFF_LOOPBACK) &&
1357              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1358              !(cfg->fc_flags & RTF_LOCAL))) {
1359                 /* hold loopback dev/idev if we haven't done so. */
1360                 if (dev != net->loopback_dev) {
1361                         if (dev) {
1362                                 dev_put(dev);
1363                                 in6_dev_put(idev);
1364                         }
1365                         dev = net->loopback_dev;
1366                         dev_hold(dev);
1367                         idev = in6_dev_get(dev);
1368                         if (!idev) {
1369                                 err = -ENODEV;
1370                                 goto out;
1371                         }
1372                 }
1373                 rt->dst.output = ip6_pkt_discard_out;
1374                 rt->dst.input = ip6_pkt_discard;
1375                 rt->dst.error = -ENETUNREACH;
1376                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1377                 goto install_route;
1378         }
1379
1380         if (cfg->fc_flags & RTF_GATEWAY) {
1381                 const struct in6_addr *gw_addr;
1382                 int gwa_type;
1383
1384                 gw_addr = &cfg->fc_gateway;
1385                 rt->rt6i_gateway = *gw_addr;
1386                 gwa_type = ipv6_addr_type(gw_addr);
1387
1388                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1389                         struct rt6_info *grt;
1390
1391                         /* IPv6 strictly inhibits using not link-local
1392                            addresses as nexthop address.
1393                            Otherwise, router will not able to send redirects.
1394                            It is very good, but in some (rare!) circumstances
1395                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1396                            some exceptions. --ANK
1397                          */
1398                         err = -EINVAL;
1399                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1400                                 goto out;
1401
1402                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1403
1404                         err = -EHOSTUNREACH;
1405                         if (!grt)
1406                                 goto out;
1407                         if (dev) {
1408                                 if (dev != grt->dst.dev) {
1409                                         dst_release(&grt->dst);
1410                                         goto out;
1411                                 }
1412                         } else {
1413                                 dev = grt->dst.dev;
1414                                 idev = grt->rt6i_idev;
1415                                 dev_hold(dev);
1416                                 in6_dev_hold(grt->rt6i_idev);
1417                         }
1418                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1419                                 err = 0;
1420                         dst_release(&grt->dst);
1421
1422                         if (err)
1423                                 goto out;
1424                 }
1425                 err = -EINVAL;
1426                 if (!dev || (dev->flags & IFF_LOOPBACK))
1427                         goto out;
1428         }
1429
1430         err = -ENODEV;
1431         if (!dev)
1432                 goto out;
1433
1434         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1435                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1436                         err = -EINVAL;
1437                         goto out;
1438                 }
1439                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1440                 rt->rt6i_prefsrc.plen = 128;
1441         } else
1442                 rt->rt6i_prefsrc.plen = 0;
1443
1444         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1445                 err = rt6_bind_neighbour(rt, dev);
1446                 if (err)
1447                         goto out;
1448         }
1449
1450         rt->rt6i_flags = cfg->fc_flags;
1451
1452 install_route:
1453         if (cfg->fc_mx) {
1454                 struct nlattr *nla;
1455                 int remaining;
1456
1457                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1458                         int type = nla_type(nla);
1459
1460                         if (type) {
1461                                 if (type > RTAX_MAX) {
1462                                         err = -EINVAL;
1463                                         goto out;
1464                                 }
1465
1466                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1467                         }
1468                 }
1469         }
1470
1471         rt->dst.dev = dev;
1472         rt->rt6i_idev = idev;
1473         rt->rt6i_table = table;
1474
1475         cfg->fc_nlinfo.nl_net = dev_net(dev);
1476
1477         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1478
1479 out:
1480         if (dev)
1481                 dev_put(dev);
1482         if (idev)
1483                 in6_dev_put(idev);
1484         if (rt)
1485                 dst_free(&rt->dst);
1486         return err;
1487 }
1488
1489 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1490 {
1491         int err;
1492         struct fib6_table *table;
1493         struct net *net = dev_net(rt->dst.dev);
1494
1495         if (rt == net->ipv6.ip6_null_entry)
1496                 return -ENOENT;
1497
1498         table = rt->rt6i_table;
1499         write_lock_bh(&table->tb6_lock);
1500
1501         err = fib6_del(rt, info);
1502         dst_release(&rt->dst);
1503
1504         write_unlock_bh(&table->tb6_lock);
1505
1506         return err;
1507 }
1508
1509 int ip6_del_rt(struct rt6_info *rt)
1510 {
1511         struct nl_info info = {
1512                 .nl_net = dev_net(rt->dst.dev),
1513         };
1514         return __ip6_del_rt(rt, &info);
1515 }
1516
1517 static int ip6_route_del(struct fib6_config *cfg)
1518 {
1519         struct fib6_table *table;
1520         struct fib6_node *fn;
1521         struct rt6_info *rt;
1522         int err = -ESRCH;
1523
1524         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1525         if (!table)
1526                 return err;
1527
1528         read_lock_bh(&table->tb6_lock);
1529
1530         fn = fib6_locate(&table->tb6_root,
1531                          &cfg->fc_dst, cfg->fc_dst_len,
1532                          &cfg->fc_src, cfg->fc_src_len);
1533
1534         if (fn) {
1535                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1536                         if (cfg->fc_ifindex &&
1537                             (!rt->dst.dev ||
1538                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1539                                 continue;
1540                         if (cfg->fc_flags & RTF_GATEWAY &&
1541                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1542                                 continue;
1543                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1544                                 continue;
1545                         dst_hold(&rt->dst);
1546                         read_unlock_bh(&table->tb6_lock);
1547
1548                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1549                 }
1550         }
1551         read_unlock_bh(&table->tb6_lock);
1552
1553         return err;
1554 }
1555
1556 /*
1557  *      Handle redirects
1558  */
1559 struct ip6rd_flowi {
1560         struct flowi6 fl6;
1561         struct in6_addr gateway;
1562 };
1563
1564 static struct rt6_info *__ip6_route_redirect(struct net *net,
1565                                              struct fib6_table *table,
1566                                              struct flowi6 *fl6,
1567                                              int flags)
1568 {
1569         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1570         struct rt6_info *rt;
1571         struct fib6_node *fn;
1572
1573         /*
1574          * Get the "current" route for this destination and
1575          * check if the redirect has come from approriate router.
1576          *
1577          * RFC 2461 specifies that redirects should only be
1578          * accepted if they come from the nexthop to the target.
1579          * Due to the way the routes are chosen, this notion
1580          * is a bit fuzzy and one might need to check all possible
1581          * routes.
1582          */
1583
1584         read_lock_bh(&table->tb6_lock);
1585         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1586 restart:
1587         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1588                 /*
1589                  * Current route is on-link; redirect is always invalid.
1590                  *
1591                  * Seems, previous statement is not true. It could
1592                  * be node, which looks for us as on-link (f.e. proxy ndisc)
1593                  * But then router serving it might decide, that we should
1594                  * know truth 8)8) --ANK (980726).
1595                  */
1596                 if (rt6_check_expired(rt))
1597                         continue;
1598                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1599                         continue;
1600                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1601                         continue;
1602                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1603                         continue;
1604                 break;
1605         }
1606
1607         if (!rt)
1608                 rt = net->ipv6.ip6_null_entry;
1609         BACKTRACK(net, &fl6->saddr);
1610 out:
1611         dst_hold(&rt->dst);
1612
1613         read_unlock_bh(&table->tb6_lock);
1614
1615         return rt;
1616 };
1617
1618 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1619                                            const struct in6_addr *src,
1620                                            const struct in6_addr *gateway,
1621                                            struct net_device *dev)
1622 {
1623         int flags = RT6_LOOKUP_F_HAS_SADDR;
1624         struct net *net = dev_net(dev);
1625         struct ip6rd_flowi rdfl = {
1626                 .fl6 = {
1627                         .flowi6_oif = dev->ifindex,
1628                         .daddr = *dest,
1629                         .saddr = *src,
1630                 },
1631         };
1632
1633         rdfl.gateway = *gateway;
1634
1635         if (rt6_need_strict(dest))
1636                 flags |= RT6_LOOKUP_F_IFACE;
1637
1638         return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1639                                                    flags, __ip6_route_redirect);
1640 }
1641
1642 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1643                   const struct in6_addr *saddr,
1644                   struct neighbour *neigh, u8 *lladdr, int on_link)
1645 {
1646         struct rt6_info *rt, *nrt = NULL;
1647         struct netevent_redirect netevent;
1648         struct net *net = dev_net(neigh->dev);
1649
1650         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1651
1652         if (rt == net->ipv6.ip6_null_entry) {
1653                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1654                 goto out;
1655         }
1656
1657         /*
1658          *      We have finally decided to accept it.
1659          */
1660
1661         neigh_update(neigh, lladdr, NUD_STALE,
1662                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1663                      NEIGH_UPDATE_F_OVERRIDE|
1664                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1665                                      NEIGH_UPDATE_F_ISROUTER))
1666                      );
1667
1668         /*
1669          * Redirect received -> path was valid.
1670          * Look, redirects are sent only in response to data packets,
1671          * so that this nexthop apparently is reachable. --ANK
1672          */
1673         dst_confirm(&rt->dst);
1674
1675         /* Duplicate redirect: silently ignore. */
1676         if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1677                 goto out;
1678
1679         nrt = ip6_rt_copy(rt, dest);
1680         if (!nrt)
1681                 goto out;
1682
1683         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1684         if (on_link)
1685                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1686
1687         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1688         dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1689
1690         if (ip6_ins_rt(nrt))
1691                 goto out;
1692
1693         netevent.old = &rt->dst;
1694         netevent.new = &nrt->dst;
1695         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1696
1697         if (rt->rt6i_flags & RTF_CACHE) {
1698                 ip6_del_rt(rt);
1699                 return;
1700         }
1701
1702 out:
1703         dst_release(&rt->dst);
1704 }
1705
1706 /*
1707  *      Handle ICMP "packet too big" messages
1708  *      i.e. Path MTU discovery
1709  */
1710
1711 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1712                              struct net *net, u32 pmtu, int ifindex)
1713 {
1714         struct rt6_info *rt, *nrt;
1715         int allfrag = 0;
1716 again:
1717         rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1718         if (!rt)
1719                 return;
1720
1721         if (rt6_check_expired(rt)) {
1722                 ip6_del_rt(rt);
1723                 goto again;
1724         }
1725
1726         if (pmtu >= dst_mtu(&rt->dst))
1727                 goto out;
1728
1729         if (pmtu < IPV6_MIN_MTU) {
1730                 /*
1731                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1732                  * MTU (1280) and a fragment header should always be included
1733                  * after a node receiving Too Big message reporting PMTU is
1734                  * less than the IPv6 Minimum Link MTU.
1735                  */
1736                 pmtu = IPV6_MIN_MTU;
1737                 allfrag = 1;
1738         }
1739
1740         /* New mtu received -> path was valid.
1741            They are sent only in response to data packets,
1742            so that this nexthop apparently is reachable. --ANK
1743          */
1744         dst_confirm(&rt->dst);
1745
1746         /* Host route. If it is static, it would be better
1747            not to override it, but add new one, so that
1748            when cache entry will expire old pmtu
1749            would return automatically.
1750          */
1751         if (rt->rt6i_flags & RTF_CACHE) {
1752                 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1753                 if (allfrag) {
1754                         u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1755                         features |= RTAX_FEATURE_ALLFRAG;
1756                         dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1757                 }
1758                 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1759                 rt->rt6i_flags |= RTF_MODIFIED;
1760                 goto out;
1761         }
1762
1763         /* Network route.
1764            Two cases are possible:
1765            1. It is connected route. Action: COW
1766            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1767          */
1768         if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1769                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1770         else
1771                 nrt = rt6_alloc_clone(rt, daddr);
1772
1773         if (nrt) {
1774                 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1775                 if (allfrag) {
1776                         u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1777                         features |= RTAX_FEATURE_ALLFRAG;
1778                         dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1779                 }
1780
1781                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1782                  * happened within 5 mins, the recommended timer is 10 mins.
1783                  * Here this route expiration time is set to ip6_rt_mtu_expires
1784                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1785                  * and detecting PMTU increase will be automatically happened.
1786                  */
1787                 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1788                 nrt->rt6i_flags |= RTF_DYNAMIC;
1789                 ip6_ins_rt(nrt);
1790         }
1791 out:
1792         dst_release(&rt->dst);
1793 }
1794
1795 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1796                         struct net_device *dev, u32 pmtu)
1797 {
1798         struct net *net = dev_net(dev);
1799
1800         /*
1801          * RFC 1981 states that a node "MUST reduce the size of the packets it
1802          * is sending along the path" that caused the Packet Too Big message.
1803          * Since it's not possible in the general case to determine which
1804          * interface was used to send the original packet, we update the MTU
1805          * on the interface that will be used to send future packets. We also
1806          * update the MTU on the interface that received the Packet Too Big in
1807          * case the original packet was forced out that interface with
1808          * SO_BINDTODEVICE or similar. This is the next best thing to the
1809          * correct behaviour, which would be to update the MTU on all
1810          * interfaces.
1811          */
1812         rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1813         rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1814 }
1815
1816 /*
1817  *      Misc support functions
1818  */
1819
1820 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1821                                     const struct in6_addr *dest)
1822 {
1823         struct net *net = dev_net(ort->dst.dev);
1824         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1825                                             ort->rt6i_table);
1826
1827         if (rt) {
1828                 rt->dst.input = ort->dst.input;
1829                 rt->dst.output = ort->dst.output;
1830                 rt->dst.flags |= DST_HOST;
1831
1832                 rt->rt6i_dst.addr = *dest;
1833                 rt->rt6i_dst.plen = 128;
1834                 dst_copy_metrics(&rt->dst, &ort->dst);
1835                 rt->dst.error = ort->dst.error;
1836                 rt->rt6i_idev = ort->rt6i_idev;
1837                 if (rt->rt6i_idev)
1838                         in6_dev_hold(rt->rt6i_idev);
1839                 rt->dst.lastuse = jiffies;
1840
1841                 rt->rt6i_gateway = ort->rt6i_gateway;
1842                 rt->rt6i_flags = ort->rt6i_flags;
1843                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1844                     (RTF_DEFAULT | RTF_ADDRCONF))
1845                         rt6_set_from(rt, ort);
1846                 else
1847                         rt6_clean_expires(rt);
1848                 rt->rt6i_metric = 0;
1849
1850 #ifdef CONFIG_IPV6_SUBTREES
1851                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1852 #endif
1853                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1854                 rt->rt6i_table = ort->rt6i_table;
1855         }
1856         return rt;
1857 }
1858
1859 #ifdef CONFIG_IPV6_ROUTE_INFO
1860 static struct rt6_info *rt6_get_route_info(struct net *net,
1861                                            const struct in6_addr *prefix, int prefixlen,
1862                                            const struct in6_addr *gwaddr, int ifindex)
1863 {
1864         struct fib6_node *fn;
1865         struct rt6_info *rt = NULL;
1866         struct fib6_table *table;
1867
1868         table = fib6_get_table(net, RT6_TABLE_INFO);
1869         if (!table)
1870                 return NULL;
1871
1872         write_lock_bh(&table->tb6_lock);
1873         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1874         if (!fn)
1875                 goto out;
1876
1877         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1878                 if (rt->dst.dev->ifindex != ifindex)
1879                         continue;
1880                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1881                         continue;
1882                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1883                         continue;
1884                 dst_hold(&rt->dst);
1885                 break;
1886         }
1887 out:
1888         write_unlock_bh(&table->tb6_lock);
1889         return rt;
1890 }
1891
1892 static struct rt6_info *rt6_add_route_info(struct net *net,
1893                                            const struct in6_addr *prefix, int prefixlen,
1894                                            const struct in6_addr *gwaddr, int ifindex,
1895                                            unsigned int pref)
1896 {
1897         struct fib6_config cfg = {
1898                 .fc_table       = RT6_TABLE_INFO,
1899                 .fc_metric      = IP6_RT_PRIO_USER,
1900                 .fc_ifindex     = ifindex,
1901                 .fc_dst_len     = prefixlen,
1902                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1903                                   RTF_UP | RTF_PREF(pref),
1904                 .fc_nlinfo.pid = 0,
1905                 .fc_nlinfo.nlh = NULL,
1906                 .fc_nlinfo.nl_net = net,
1907         };
1908
1909         cfg.fc_dst = *prefix;
1910         cfg.fc_gateway = *gwaddr;
1911
1912         /* We should treat it as a default route if prefix length is 0. */
1913         if (!prefixlen)
1914                 cfg.fc_flags |= RTF_DEFAULT;
1915
1916         ip6_route_add(&cfg);
1917
1918         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1919 }
1920 #endif
1921
1922 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1923 {
1924         struct rt6_info *rt;
1925         struct fib6_table *table;
1926
1927         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1928         if (!table)
1929                 return NULL;
1930
1931         write_lock_bh(&table->tb6_lock);
1932         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1933                 if (dev == rt->dst.dev &&
1934                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1935                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1936                         break;
1937         }
1938         if (rt)
1939                 dst_hold(&rt->dst);
1940         write_unlock_bh(&table->tb6_lock);
1941         return rt;
1942 }
1943
1944 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1945                                      struct net_device *dev,
1946                                      unsigned int pref)
1947 {
1948         struct fib6_config cfg = {
1949                 .fc_table       = RT6_TABLE_DFLT,
1950                 .fc_metric      = IP6_RT_PRIO_USER,
1951                 .fc_ifindex     = dev->ifindex,
1952                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1953                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1954                 .fc_nlinfo.pid = 0,
1955                 .fc_nlinfo.nlh = NULL,
1956                 .fc_nlinfo.nl_net = dev_net(dev),
1957         };
1958
1959         cfg.fc_gateway = *gwaddr;
1960
1961         ip6_route_add(&cfg);
1962
1963         return rt6_get_dflt_router(gwaddr, dev);
1964 }
1965
1966 void rt6_purge_dflt_routers(struct net *net)
1967 {
1968         struct rt6_info *rt;
1969         struct fib6_table *table;
1970
1971         /* NOTE: Keep consistent with rt6_get_dflt_router */
1972         table = fib6_get_table(net, RT6_TABLE_DFLT);
1973         if (!table)
1974                 return;
1975
1976 restart:
1977         read_lock_bh(&table->tb6_lock);
1978         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1979                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1980                         dst_hold(&rt->dst);
1981                         read_unlock_bh(&table->tb6_lock);
1982                         ip6_del_rt(rt);
1983                         goto restart;
1984                 }
1985         }
1986         read_unlock_bh(&table->tb6_lock);
1987 }
1988
1989 static void rtmsg_to_fib6_config(struct net *net,
1990                                  struct in6_rtmsg *rtmsg,
1991                                  struct fib6_config *cfg)
1992 {
1993         memset(cfg, 0, sizeof(*cfg));
1994
1995         cfg->fc_table = RT6_TABLE_MAIN;
1996         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1997         cfg->fc_metric = rtmsg->rtmsg_metric;
1998         cfg->fc_expires = rtmsg->rtmsg_info;
1999         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2000         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2001         cfg->fc_flags = rtmsg->rtmsg_flags;
2002
2003         cfg->fc_nlinfo.nl_net = net;
2004
2005         cfg->fc_dst = rtmsg->rtmsg_dst;
2006         cfg->fc_src = rtmsg->rtmsg_src;
2007         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2008 }
2009
2010 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2011 {
2012         struct fib6_config cfg;
2013         struct in6_rtmsg rtmsg;
2014         int err;
2015
2016         switch(cmd) {
2017         case SIOCADDRT:         /* Add a route */
2018         case SIOCDELRT:         /* Delete a route */
2019                 if (!capable(CAP_NET_ADMIN))
2020                         return -EPERM;
2021                 err = copy_from_user(&rtmsg, arg,
2022                                      sizeof(struct in6_rtmsg));
2023                 if (err)
2024                         return -EFAULT;
2025
2026                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2027
2028                 rtnl_lock();
2029                 switch (cmd) {
2030                 case SIOCADDRT:
2031                         err = ip6_route_add(&cfg);
2032                         break;
2033                 case SIOCDELRT:
2034                         err = ip6_route_del(&cfg);
2035                         break;
2036                 default:
2037                         err = -EINVAL;
2038                 }
2039                 rtnl_unlock();
2040
2041                 return err;
2042         }
2043
2044         return -EINVAL;
2045 }
2046
2047 /*
2048  *      Drop the packet on the floor
2049  */
2050
2051 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2052 {
2053         int type;
2054         struct dst_entry *dst = skb_dst(skb);
2055         switch (ipstats_mib_noroutes) {
2056         case IPSTATS_MIB_INNOROUTES:
2057                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2058                 if (type == IPV6_ADDR_ANY) {
2059                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2060                                       IPSTATS_MIB_INADDRERRORS);
2061                         break;
2062                 }
2063                 /* FALLTHROUGH */
2064         case IPSTATS_MIB_OUTNOROUTES:
2065                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2066                               ipstats_mib_noroutes);
2067                 break;
2068         }
2069         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2070         kfree_skb(skb);
2071         return 0;
2072 }
2073
2074 static int ip6_pkt_discard(struct sk_buff *skb)
2075 {
2076         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2077 }
2078
2079 static int ip6_pkt_discard_out(struct sk_buff *skb)
2080 {
2081         skb->dev = skb_dst(skb)->dev;
2082         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2083 }
2084
2085 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2086
2087 static int ip6_pkt_prohibit(struct sk_buff *skb)
2088 {
2089         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2090 }
2091
2092 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2093 {
2094         skb->dev = skb_dst(skb)->dev;
2095         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2096 }
2097
2098 #endif
2099
2100 /*
2101  *      Allocate a dst for local (unicast / anycast) address.
2102  */
2103
2104 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2105                                     const struct in6_addr *addr,
2106                                     bool anycast)
2107 {
2108         struct net *net = dev_net(idev->dev);
2109         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2110         int err;
2111
2112         if (!rt) {
2113                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2114                 return ERR_PTR(-ENOMEM);
2115         }
2116
2117         in6_dev_hold(idev);
2118
2119         rt->dst.flags |= DST_HOST;
2120         rt->dst.input = ip6_input;
2121         rt->dst.output = ip6_output;
2122         rt->rt6i_idev = idev;
2123         rt->dst.obsolete = -1;
2124
2125         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2126         if (anycast)
2127                 rt->rt6i_flags |= RTF_ANYCAST;
2128         else
2129                 rt->rt6i_flags |= RTF_LOCAL;
2130         err = rt6_bind_neighbour(rt, rt->dst.dev);
2131         if (err) {
2132                 dst_free(&rt->dst);
2133                 return ERR_PTR(err);
2134         }
2135
2136         rt->rt6i_dst.addr = *addr;
2137         rt->rt6i_dst.plen = 128;
2138         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2139
2140         atomic_set(&rt->dst.__refcnt, 1);
2141
2142         return rt;
2143 }
2144
2145 int ip6_route_get_saddr(struct net *net,
2146                         struct rt6_info *rt,
2147                         const struct in6_addr *daddr,
2148                         unsigned int prefs,
2149                         struct in6_addr *saddr)
2150 {
2151         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2152         int err = 0;
2153         if (rt->rt6i_prefsrc.plen)
2154                 *saddr = rt->rt6i_prefsrc.addr;
2155         else
2156                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2157                                          daddr, prefs, saddr);
2158         return err;
2159 }
2160
2161 /* remove deleted ip from prefsrc entries */
2162 struct arg_dev_net_ip {
2163         struct net_device *dev;
2164         struct net *net;
2165         struct in6_addr *addr;
2166 };
2167
2168 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2169 {
2170         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2171         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2172         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2173
2174         if (((void *)rt->dst.dev == dev || !dev) &&
2175             rt != net->ipv6.ip6_null_entry &&
2176             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2177                 /* remove prefsrc entry */
2178                 rt->rt6i_prefsrc.plen = 0;
2179         }
2180         return 0;
2181 }
2182
2183 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2184 {
2185         struct net *net = dev_net(ifp->idev->dev);
2186         struct arg_dev_net_ip adni = {
2187                 .dev = ifp->idev->dev,
2188                 .net = net,
2189                 .addr = &ifp->addr,
2190         };
2191         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2192 }
2193
2194 struct arg_dev_net {
2195         struct net_device *dev;
2196         struct net *net;
2197 };
2198
2199 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2200 {
2201         const struct arg_dev_net *adn = arg;
2202         const struct net_device *dev = adn->dev;
2203
2204         if ((rt->dst.dev == dev || !dev) &&
2205             rt != adn->net->ipv6.ip6_null_entry)
2206                 return -1;
2207
2208         return 0;
2209 }
2210
2211 void rt6_ifdown(struct net *net, struct net_device *dev)
2212 {
2213         struct arg_dev_net adn = {
2214                 .dev = dev,
2215                 .net = net,
2216         };
2217
2218         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2219         icmp6_clean_all(fib6_ifdown, &adn);
2220 }
2221
2222 struct rt6_mtu_change_arg {
2223         struct net_device *dev;
2224         unsigned int mtu;
2225 };
2226
2227 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2228 {
2229         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2230         struct inet6_dev *idev;
2231
2232         /* In IPv6 pmtu discovery is not optional,
2233            so that RTAX_MTU lock cannot disable it.
2234            We still use this lock to block changes
2235            caused by addrconf/ndisc.
2236         */
2237
2238         idev = __in6_dev_get(arg->dev);
2239         if (!idev)
2240                 return 0;
2241
2242         /* For administrative MTU increase, there is no way to discover
2243            IPv6 PMTU increase, so PMTU increase should be updated here.
2244            Since RFC 1981 doesn't include administrative MTU increase
2245            update PMTU increase is a MUST. (i.e. jumbo frame)
2246          */
2247         /*
2248            If new MTU is less than route PMTU, this new MTU will be the
2249            lowest MTU in the path, update the route PMTU to reflect PMTU
2250            decreases; if new MTU is greater than route PMTU, and the
2251            old MTU is the lowest MTU in the path, update the route PMTU
2252            to reflect the increase. In this case if the other nodes' MTU
2253            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2254            PMTU discouvery.
2255          */
2256         if (rt->dst.dev == arg->dev &&
2257             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2258             (dst_mtu(&rt->dst) >= arg->mtu ||
2259              (dst_mtu(&rt->dst) < arg->mtu &&
2260               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2261                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2262         }
2263         return 0;
2264 }
2265
2266 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2267 {
2268         struct rt6_mtu_change_arg arg = {
2269                 .dev = dev,
2270                 .mtu = mtu,
2271         };
2272
2273         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2274 }
2275
2276 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2277         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2278         [RTA_OIF]               = { .type = NLA_U32 },
2279         [RTA_IIF]               = { .type = NLA_U32 },
2280         [RTA_PRIORITY]          = { .type = NLA_U32 },
2281         [RTA_METRICS]           = { .type = NLA_NESTED },
2282 };
2283
2284 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2285                               struct fib6_config *cfg)
2286 {
2287         struct rtmsg *rtm;
2288         struct nlattr *tb[RTA_MAX+1];
2289         int err;
2290
2291         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2292         if (err < 0)
2293                 goto errout;
2294
2295         err = -EINVAL;
2296         rtm = nlmsg_data(nlh);
2297         memset(cfg, 0, sizeof(*cfg));
2298
2299         cfg->fc_table = rtm->rtm_table;
2300         cfg->fc_dst_len = rtm->rtm_dst_len;
2301         cfg->fc_src_len = rtm->rtm_src_len;
2302         cfg->fc_flags = RTF_UP;
2303         cfg->fc_protocol = rtm->rtm_protocol;
2304
2305         if (rtm->rtm_type == RTN_UNREACHABLE)
2306                 cfg->fc_flags |= RTF_REJECT;
2307
2308         if (rtm->rtm_type == RTN_LOCAL)
2309                 cfg->fc_flags |= RTF_LOCAL;
2310
2311         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2312         cfg->fc_nlinfo.nlh = nlh;
2313         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2314
2315         if (tb[RTA_GATEWAY]) {
2316                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2317                 cfg->fc_flags |= RTF_GATEWAY;
2318         }
2319
2320         if (tb[RTA_DST]) {
2321                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2322
2323                 if (nla_len(tb[RTA_DST]) < plen)
2324                         goto errout;
2325
2326                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2327         }
2328
2329         if (tb[RTA_SRC]) {
2330                 int plen = (rtm->rtm_src_len + 7) >> 3;
2331
2332                 if (nla_len(tb[RTA_SRC]) < plen)
2333                         goto errout;
2334
2335                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2336         }
2337
2338         if (tb[RTA_PREFSRC])
2339                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2340
2341         if (tb[RTA_OIF])
2342                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2343
2344         if (tb[RTA_PRIORITY])
2345                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2346
2347         if (tb[RTA_METRICS]) {
2348                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2349                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2350         }
2351
2352         if (tb[RTA_TABLE])
2353                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2354
2355         err = 0;
2356 errout:
2357         return err;
2358 }
2359
2360 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2361 {
2362         struct fib6_config cfg;
2363         int err;
2364
2365         err = rtm_to_fib6_config(skb, nlh, &cfg);
2366         if (err < 0)
2367                 return err;
2368
2369         return ip6_route_del(&cfg);
2370 }
2371
2372 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2373 {
2374         struct fib6_config cfg;
2375         int err;
2376
2377         err = rtm_to_fib6_config(skb, nlh, &cfg);
2378         if (err < 0)
2379                 return err;
2380
2381         return ip6_route_add(&cfg);
2382 }
2383
2384 static inline size_t rt6_nlmsg_size(void)
2385 {
2386         return NLMSG_ALIGN(sizeof(struct rtmsg))
2387                + nla_total_size(16) /* RTA_SRC */
2388                + nla_total_size(16) /* RTA_DST */
2389                + nla_total_size(16) /* RTA_GATEWAY */
2390                + nla_total_size(16) /* RTA_PREFSRC */
2391                + nla_total_size(4) /* RTA_TABLE */
2392                + nla_total_size(4) /* RTA_IIF */
2393                + nla_total_size(4) /* RTA_OIF */
2394                + nla_total_size(4) /* RTA_PRIORITY */
2395                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2396                + nla_total_size(sizeof(struct rta_cacheinfo));
2397 }
2398
2399 static int rt6_fill_node(struct net *net,
2400                          struct sk_buff *skb, struct rt6_info *rt,
2401                          struct in6_addr *dst, struct in6_addr *src,
2402                          int iif, int type, u32 pid, u32 seq,
2403                          int prefix, int nowait, unsigned int flags)
2404 {
2405         const struct inet_peer *peer;
2406         struct rtmsg *rtm;
2407         struct nlmsghdr *nlh;
2408         long expires;
2409         u32 table;
2410         struct neighbour *n;
2411         u32 ts, tsage;
2412
2413         if (prefix) {   /* user wants prefix routes only */
2414                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2415                         /* success since this is not a prefix route */
2416                         return 1;
2417                 }
2418         }
2419
2420         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2421         if (!nlh)
2422                 return -EMSGSIZE;
2423
2424         rtm = nlmsg_data(nlh);
2425         rtm->rtm_family = AF_INET6;
2426         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2427         rtm->rtm_src_len = rt->rt6i_src.plen;
2428         rtm->rtm_tos = 0;
2429         if (rt->rt6i_table)
2430                 table = rt->rt6i_table->tb6_id;
2431         else
2432                 table = RT6_TABLE_UNSPEC;
2433         rtm->rtm_table = table;
2434         if (nla_put_u32(skb, RTA_TABLE, table))
2435                 goto nla_put_failure;
2436         if (rt->rt6i_flags & RTF_REJECT)
2437                 rtm->rtm_type = RTN_UNREACHABLE;
2438         else if (rt->rt6i_flags & RTF_LOCAL)
2439                 rtm->rtm_type = RTN_LOCAL;
2440         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2441                 rtm->rtm_type = RTN_LOCAL;
2442         else
2443                 rtm->rtm_type = RTN_UNICAST;
2444         rtm->rtm_flags = 0;
2445         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2446         rtm->rtm_protocol = rt->rt6i_protocol;
2447         if (rt->rt6i_flags & RTF_DYNAMIC)
2448                 rtm->rtm_protocol = RTPROT_REDIRECT;
2449         else if (rt->rt6i_flags & RTF_ADDRCONF)
2450                 rtm->rtm_protocol = RTPROT_KERNEL;
2451         else if (rt->rt6i_flags & RTF_DEFAULT)
2452                 rtm->rtm_protocol = RTPROT_RA;
2453
2454         if (rt->rt6i_flags & RTF_CACHE)
2455                 rtm->rtm_flags |= RTM_F_CLONED;
2456
2457         if (dst) {
2458                 if (nla_put(skb, RTA_DST, 16, dst))
2459                         goto nla_put_failure;
2460                 rtm->rtm_dst_len = 128;
2461         } else if (rtm->rtm_dst_len)
2462                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2463                         goto nla_put_failure;
2464 #ifdef CONFIG_IPV6_SUBTREES
2465         if (src) {
2466                 if (nla_put(skb, RTA_SRC, 16, src))
2467                         goto nla_put_failure;
2468                 rtm->rtm_src_len = 128;
2469         } else if (rtm->rtm_src_len &&
2470                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2471                 goto nla_put_failure;
2472 #endif
2473         if (iif) {
2474 #ifdef CONFIG_IPV6_MROUTE
2475                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2476                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2477                         if (err <= 0) {
2478                                 if (!nowait) {
2479                                         if (err == 0)
2480                                                 return 0;
2481                                         goto nla_put_failure;
2482                                 } else {
2483                                         if (err == -EMSGSIZE)
2484                                                 goto nla_put_failure;
2485                                 }
2486                         }
2487                 } else
2488 #endif
2489                         if (nla_put_u32(skb, RTA_IIF, iif))
2490                                 goto nla_put_failure;
2491         } else if (dst) {
2492                 struct in6_addr saddr_buf;
2493                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2494                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2495                         goto nla_put_failure;
2496         }
2497
2498         if (rt->rt6i_prefsrc.plen) {
2499                 struct in6_addr saddr_buf;
2500                 saddr_buf = rt->rt6i_prefsrc.addr;
2501                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2502                         goto nla_put_failure;
2503         }
2504
2505         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2506                 goto nla_put_failure;
2507
2508         rcu_read_lock();
2509         n = dst_get_neighbour_noref(&rt->dst);
2510         if (n) {
2511                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2512                         rcu_read_unlock();
2513                         goto nla_put_failure;
2514                 }
2515         }
2516         rcu_read_unlock();
2517
2518         if (rt->dst.dev &&
2519             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2520                 goto nla_put_failure;
2521         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2522                 goto nla_put_failure;
2523         if (!(rt->rt6i_flags & RTF_EXPIRES))
2524                 expires = 0;
2525         else if (rt->dst.expires - jiffies < INT_MAX)
2526                 expires = rt->dst.expires - jiffies;
2527         else
2528                 expires = INT_MAX;
2529
2530         peer = NULL;
2531         if (rt6_has_peer(rt))
2532                 peer = rt6_peer_ptr(rt);
2533         ts = tsage = 0;
2534         if (peer && peer->tcp_ts_stamp) {
2535                 ts = peer->tcp_ts;
2536                 tsage = get_seconds() - peer->tcp_ts_stamp;
2537         }
2538
2539         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2540                                expires, rt->dst.error) < 0)
2541                 goto nla_put_failure;
2542
2543         return nlmsg_end(skb, nlh);
2544
2545 nla_put_failure:
2546         nlmsg_cancel(skb, nlh);
2547         return -EMSGSIZE;
2548 }
2549
2550 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2551 {
2552         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2553         int prefix;
2554
2555         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2556                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2557                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2558         } else
2559                 prefix = 0;
2560
2561         return rt6_fill_node(arg->net,
2562                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2563                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2564                      prefix, 0, NLM_F_MULTI);
2565 }
2566
2567 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2568 {
2569         struct net *net = sock_net(in_skb->sk);
2570         struct nlattr *tb[RTA_MAX+1];
2571         struct rt6_info *rt;
2572         struct sk_buff *skb;
2573         struct rtmsg *rtm;
2574         struct flowi6 fl6;
2575         int err, iif = 0, oif = 0;
2576
2577         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2578         if (err < 0)
2579                 goto errout;
2580
2581         err = -EINVAL;
2582         memset(&fl6, 0, sizeof(fl6));
2583
2584         if (tb[RTA_SRC]) {
2585                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2586                         goto errout;
2587
2588                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2589         }
2590
2591         if (tb[RTA_DST]) {
2592                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2593                         goto errout;
2594
2595                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2596         }
2597
2598         if (tb[RTA_IIF])
2599                 iif = nla_get_u32(tb[RTA_IIF]);
2600
2601         if (tb[RTA_OIF])
2602                 oif = nla_get_u32(tb[RTA_OIF]);
2603
2604         if (iif) {
2605                 struct net_device *dev;
2606                 int flags = 0;
2607
2608                 dev = __dev_get_by_index(net, iif);
2609                 if (!dev) {
2610                         err = -ENODEV;
2611                         goto errout;
2612                 }
2613
2614                 fl6.flowi6_iif = iif;
2615
2616                 if (!ipv6_addr_any(&fl6.saddr))
2617                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2618
2619                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2620                                                                flags);
2621         } else {
2622                 fl6.flowi6_oif = oif;
2623
2624                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2625         }
2626
2627         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2628         if (!skb) {
2629                 dst_release(&rt->dst);
2630                 err = -ENOBUFS;
2631                 goto errout;
2632         }
2633
2634         /* Reserve room for dummy headers, this skb can pass
2635            through good chunk of routing engine.
2636          */
2637         skb_reset_mac_header(skb);
2638         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2639
2640         skb_dst_set(skb, &rt->dst);
2641
2642         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2643                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2644                             nlh->nlmsg_seq, 0, 0, 0);
2645         if (err < 0) {
2646                 kfree_skb(skb);
2647                 goto errout;
2648         }
2649
2650         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2651 errout:
2652         return err;
2653 }
2654
2655 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2656 {
2657         struct sk_buff *skb;
2658         struct net *net = info->nl_net;
2659         u32 seq;
2660         int err;
2661
2662         err = -ENOBUFS;
2663         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2664
2665         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2666         if (!skb)
2667                 goto errout;
2668
2669         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2670                                 event, info->pid, seq, 0, 0, 0);
2671         if (err < 0) {
2672                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2673                 WARN_ON(err == -EMSGSIZE);
2674                 kfree_skb(skb);
2675                 goto errout;
2676         }
2677         rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2678                     info->nlh, gfp_any());
2679         return;
2680 errout:
2681         if (err < 0)
2682                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2683 }
2684
2685 static int ip6_route_dev_notify(struct notifier_block *this,
2686                                 unsigned long event, void *data)
2687 {
2688         struct net_device *dev = (struct net_device *)data;
2689         struct net *net = dev_net(dev);
2690
2691         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2692                 net->ipv6.ip6_null_entry->dst.dev = dev;
2693                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2694 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2695                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2696                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2697                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2698                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2699 #endif
2700         }
2701
2702         return NOTIFY_OK;
2703 }
2704
2705 /*
2706  *      /proc
2707  */
2708
2709 #ifdef CONFIG_PROC_FS
2710
2711 struct rt6_proc_arg
2712 {
2713         char *buffer;
2714         int offset;
2715         int length;
2716         int skip;
2717         int len;
2718 };
2719
2720 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2721 {
2722         struct seq_file *m = p_arg;
2723         struct neighbour *n;
2724
2725         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2726
2727 #ifdef CONFIG_IPV6_SUBTREES
2728         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2729 #else
2730         seq_puts(m, "00000000000000000000000000000000 00 ");
2731 #endif
2732         rcu_read_lock();
2733         n = dst_get_neighbour_noref(&rt->dst);
2734         if (n) {
2735                 seq_printf(m, "%pi6", n->primary_key);
2736         } else {
2737                 seq_puts(m, "00000000000000000000000000000000");
2738         }
2739         rcu_read_unlock();
2740         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2741                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2742                    rt->dst.__use, rt->rt6i_flags,
2743                    rt->dst.dev ? rt->dst.dev->name : "");
2744         return 0;
2745 }
2746
2747 static int ipv6_route_show(struct seq_file *m, void *v)
2748 {
2749         struct net *net = (struct net *)m->private;
2750         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2751         return 0;
2752 }
2753
2754 static int ipv6_route_open(struct inode *inode, struct file *file)
2755 {
2756         return single_open_net(inode, file, ipv6_route_show);
2757 }
2758
2759 static const struct file_operations ipv6_route_proc_fops = {
2760         .owner          = THIS_MODULE,
2761         .open           = ipv6_route_open,
2762         .read           = seq_read,
2763         .llseek         = seq_lseek,
2764         .release        = single_release_net,
2765 };
2766
2767 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2768 {
2769         struct net *net = (struct net *)seq->private;
2770         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2771                    net->ipv6.rt6_stats->fib_nodes,
2772                    net->ipv6.rt6_stats->fib_route_nodes,
2773                    net->ipv6.rt6_stats->fib_rt_alloc,
2774                    net->ipv6.rt6_stats->fib_rt_entries,
2775                    net->ipv6.rt6_stats->fib_rt_cache,
2776                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2777                    net->ipv6.rt6_stats->fib_discarded_routes);
2778
2779         return 0;
2780 }
2781
2782 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2783 {
2784         return single_open_net(inode, file, rt6_stats_seq_show);
2785 }
2786
2787 static const struct file_operations rt6_stats_seq_fops = {
2788         .owner   = THIS_MODULE,
2789         .open    = rt6_stats_seq_open,
2790         .read    = seq_read,
2791         .llseek  = seq_lseek,
2792         .release = single_release_net,
2793 };
2794 #endif  /* CONFIG_PROC_FS */
2795
2796 #ifdef CONFIG_SYSCTL
2797
2798 static
2799 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2800                               void __user *buffer, size_t *lenp, loff_t *ppos)
2801 {
2802         struct net *net;
2803         int delay;
2804         if (!write)
2805                 return -EINVAL;
2806
2807         net = (struct net *)ctl->extra1;
2808         delay = net->ipv6.sysctl.flush_delay;
2809         proc_dointvec(ctl, write, buffer, lenp, ppos);
2810         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2811         return 0;
2812 }
2813
2814 ctl_table ipv6_route_table_template[] = {
2815         {
2816                 .procname       =       "flush",
2817                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2818                 .maxlen         =       sizeof(int),
2819                 .mode           =       0200,
2820                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2821         },
2822         {
2823                 .procname       =       "gc_thresh",
2824                 .data           =       &ip6_dst_ops_template.gc_thresh,
2825                 .maxlen         =       sizeof(int),
2826                 .mode           =       0644,
2827                 .proc_handler   =       proc_dointvec,
2828         },
2829         {
2830                 .procname       =       "max_size",
2831                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2832                 .maxlen         =       sizeof(int),
2833                 .mode           =       0644,
2834                 .proc_handler   =       proc_dointvec,
2835         },
2836         {
2837                 .procname       =       "gc_min_interval",
2838                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2839                 .maxlen         =       sizeof(int),
2840                 .mode           =       0644,
2841                 .proc_handler   =       proc_dointvec_jiffies,
2842         },
2843         {
2844                 .procname       =       "gc_timeout",
2845                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2846                 .maxlen         =       sizeof(int),
2847                 .mode           =       0644,
2848                 .proc_handler   =       proc_dointvec_jiffies,
2849         },
2850         {
2851                 .procname       =       "gc_interval",
2852                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2853                 .maxlen         =       sizeof(int),
2854                 .mode           =       0644,
2855                 .proc_handler   =       proc_dointvec_jiffies,
2856         },
2857         {
2858                 .procname       =       "gc_elasticity",
2859                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2860                 .maxlen         =       sizeof(int),
2861                 .mode           =       0644,
2862                 .proc_handler   =       proc_dointvec,
2863         },
2864         {
2865                 .procname       =       "mtu_expires",
2866                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2867                 .maxlen         =       sizeof(int),
2868                 .mode           =       0644,
2869                 .proc_handler   =       proc_dointvec_jiffies,
2870         },
2871         {
2872                 .procname       =       "min_adv_mss",
2873                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2874                 .maxlen         =       sizeof(int),
2875                 .mode           =       0644,
2876                 .proc_handler   =       proc_dointvec,
2877         },
2878         {
2879                 .procname       =       "gc_min_interval_ms",
2880                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2881                 .maxlen         =       sizeof(int),
2882                 .mode           =       0644,
2883                 .proc_handler   =       proc_dointvec_ms_jiffies,
2884         },
2885         { }
2886 };
2887
2888 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2889 {
2890         struct ctl_table *table;
2891
2892         table = kmemdup(ipv6_route_table_template,
2893                         sizeof(ipv6_route_table_template),
2894                         GFP_KERNEL);
2895
2896         if (table) {
2897                 table[0].data = &net->ipv6.sysctl.flush_delay;
2898                 table[0].extra1 = net;
2899                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2900                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2901                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2902                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2903                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2904                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2905                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2906                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2907                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2908         }
2909
2910         return table;
2911 }
2912 #endif
2913
2914 static int __net_init ip6_route_net_init(struct net *net)
2915 {
2916         int ret = -ENOMEM;
2917
2918         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2919                sizeof(net->ipv6.ip6_dst_ops));
2920
2921         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2922                 goto out_ip6_dst_ops;
2923
2924         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2925                                            sizeof(*net->ipv6.ip6_null_entry),
2926                                            GFP_KERNEL);
2927         if (!net->ipv6.ip6_null_entry)
2928                 goto out_ip6_dst_entries;
2929         net->ipv6.ip6_null_entry->dst.path =
2930                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2931         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2932         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2933                          ip6_template_metrics, true);
2934
2935 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2936         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2937                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2938                                                GFP_KERNEL);
2939         if (!net->ipv6.ip6_prohibit_entry)
2940                 goto out_ip6_null_entry;
2941         net->ipv6.ip6_prohibit_entry->dst.path =
2942                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2943         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2944         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2945                          ip6_template_metrics, true);
2946
2947         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2948                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2949                                                GFP_KERNEL);
2950         if (!net->ipv6.ip6_blk_hole_entry)
2951                 goto out_ip6_prohibit_entry;
2952         net->ipv6.ip6_blk_hole_entry->dst.path =
2953                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2954         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2955         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2956                          ip6_template_metrics, true);
2957 #endif
2958
2959         net->ipv6.sysctl.flush_delay = 0;
2960         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2961         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2962         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2963         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2964         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2965         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2966         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2967
2968 #ifdef CONFIG_PROC_FS
2969         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2970         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2971 #endif
2972         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2973
2974         ret = 0;
2975 out:
2976         return ret;
2977
2978 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2979 out_ip6_prohibit_entry:
2980         kfree(net->ipv6.ip6_prohibit_entry);
2981 out_ip6_null_entry:
2982         kfree(net->ipv6.ip6_null_entry);
2983 #endif
2984 out_ip6_dst_entries:
2985         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2986 out_ip6_dst_ops:
2987         goto out;
2988 }
2989
2990 static void __net_exit ip6_route_net_exit(struct net *net)
2991 {
2992 #ifdef CONFIG_PROC_FS
2993         proc_net_remove(net, "ipv6_route");
2994         proc_net_remove(net, "rt6_stats");
2995 #endif
2996         kfree(net->ipv6.ip6_null_entry);
2997 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2998         kfree(net->ipv6.ip6_prohibit_entry);
2999         kfree(net->ipv6.ip6_blk_hole_entry);
3000 #endif
3001         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3002 }
3003
3004 static struct pernet_operations ip6_route_net_ops = {
3005         .init = ip6_route_net_init,
3006         .exit = ip6_route_net_exit,
3007 };
3008
3009 static int __net_init ipv6_inetpeer_init(struct net *net)
3010 {
3011         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3012
3013         if (!bp)
3014                 return -ENOMEM;
3015         inet_peer_base_init(bp);
3016         net->ipv6.peers = bp;
3017         return 0;
3018 }
3019
3020 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3021 {
3022         struct inet_peer_base *bp = net->ipv6.peers;
3023
3024         net->ipv6.peers = NULL;
3025         inetpeer_invalidate_tree(bp);
3026         kfree(bp);
3027 }
3028
3029 static struct pernet_operations ipv6_inetpeer_ops = {
3030         .init   =       ipv6_inetpeer_init,
3031         .exit   =       ipv6_inetpeer_exit,
3032 };
3033
3034 static struct notifier_block ip6_route_dev_notifier = {
3035         .notifier_call = ip6_route_dev_notify,
3036         .priority = 0,
3037 };
3038
3039 int __init ip6_route_init(void)
3040 {
3041         int ret;
3042
3043         ret = -ENOMEM;
3044         ip6_dst_ops_template.kmem_cachep =
3045                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3046                                   SLAB_HWCACHE_ALIGN, NULL);
3047         if (!ip6_dst_ops_template.kmem_cachep)
3048                 goto out;
3049
3050         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3051         if (ret)
3052                 goto out_kmem_cache;
3053
3054         ret = register_pernet_subsys(&ip6_route_net_ops);
3055         if (ret)
3056                 goto out_dst_entries;
3057
3058         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3059         if (ret)
3060                 goto out_register_subsys;
3061
3062         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3063
3064         /* Registering of the loopback is done before this portion of code,
3065          * the loopback reference in rt6_info will not be taken, do it
3066          * manually for init_net */
3067         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3068         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3069   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3070         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3071         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3072         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3073         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3074   #endif
3075         ret = fib6_init();
3076         if (ret)
3077                 goto out_register_inetpeer;
3078
3079         ret = xfrm6_init();
3080         if (ret)
3081                 goto out_fib6_init;
3082
3083         ret = fib6_rules_init();
3084         if (ret)
3085                 goto xfrm6_init;
3086
3087         ret = -ENOBUFS;
3088         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3089             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3090             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3091                 goto fib6_rules_init;
3092
3093         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3094         if (ret)
3095                 goto fib6_rules_init;
3096
3097 out:
3098         return ret;
3099
3100 fib6_rules_init:
3101         fib6_rules_cleanup();
3102 xfrm6_init:
3103         xfrm6_fini();
3104 out_fib6_init:
3105         fib6_gc_cleanup();
3106 out_register_inetpeer:
3107         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3108 out_register_subsys:
3109         unregister_pernet_subsys(&ip6_route_net_ops);
3110 out_dst_entries:
3111         dst_entries_destroy(&ip6_dst_blackhole_ops);
3112 out_kmem_cache:
3113         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3114         goto out;
3115 }
3116
3117 void ip6_route_cleanup(void)
3118 {
3119         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3120         fib6_rules_cleanup();
3121         xfrm6_fini();
3122         fib6_gc_cleanup();
3123         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3124         unregister_pernet_subsys(&ip6_route_net_ops);
3125         dst_entries_destroy(&ip6_dst_blackhole_ops);
3126         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3127 }