2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
62 #include <asm/uaccess.h>
65 #include <linux/sysctl.h>
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69 const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void ip6_dst_destroy(struct dst_entry *);
75 static void ip6_dst_ifdown(struct dst_entry *,
76 struct net_device *dev, int how);
77 static int ip6_dst_gc(struct dst_ops *ops);
79 static int ip6_pkt_discard(struct sk_buff *skb);
80 static int ip6_pkt_discard_out(struct sk_buff *skb);
81 static void ip6_link_failure(struct sk_buff *skb);
82 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83 struct sk_buff *skb, u32 mtu);
84 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex,
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex);
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
99 struct rt6_info *rt = (struct rt6_info *) dst;
100 struct inet_peer *peer;
103 if (!(rt->dst.flags & DST_HOST))
106 peer = rt6_get_peer_create(rt);
108 u32 *old_p = __DST_METRICS_PTR(old);
109 unsigned long prev, new;
112 if (inet_metrics_new(peer))
113 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
115 new = (unsigned long) p;
116 prev = cmpxchg(&dst->_metrics, old, new);
119 p = __DST_METRICS_PTR(prev);
120 if (prev & DST_METRICS_READ_ONLY)
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
131 struct in6_addr *p = &rt->rt6i_gateway;
133 if (!ipv6_addr_any(p))
134 return (const void *) p;
136 return &ipv6_hdr(skb)->daddr;
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
144 struct rt6_info *rt = (struct rt6_info *) dst;
147 daddr = choose_neigh_daddr(rt, skb, daddr);
148 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
151 return neigh_create(&nd_tbl, daddr, dst->dev);
154 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
156 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
158 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
167 static struct dst_ops ip6_dst_ops_template = {
169 .protocol = cpu_to_be16(ETH_P_IPV6),
172 .check = ip6_dst_check,
173 .default_advmss = ip6_default_advmss,
175 .cow_metrics = ipv6_cow_metrics,
176 .destroy = ip6_dst_destroy,
177 .ifdown = ip6_dst_ifdown,
178 .negative_advice = ip6_negative_advice,
179 .link_failure = ip6_link_failure,
180 .update_pmtu = ip6_rt_update_pmtu,
181 .redirect = rt6_do_redirect,
182 .local_out = __ip6_local_out,
183 .neigh_lookup = ip6_neigh_lookup,
186 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
188 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
190 return mtu ? : dst->dev->mtu;
193 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194 struct sk_buff *skb, u32 mtu)
198 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
203 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
209 static struct dst_ops ip6_dst_blackhole_ops = {
211 .protocol = cpu_to_be16(ETH_P_IPV6),
212 .destroy = ip6_dst_destroy,
213 .check = ip6_dst_check,
214 .mtu = ip6_blackhole_mtu,
215 .default_advmss = ip6_default_advmss,
216 .update_pmtu = ip6_rt_blackhole_update_pmtu,
217 .redirect = ip6_rt_blackhole_redirect,
218 .cow_metrics = ip6_rt_blackhole_cow_metrics,
219 .neigh_lookup = ip6_neigh_lookup,
222 static const u32 ip6_template_metrics[RTAX_MAX] = {
223 [RTAX_HOPLIMIT - 1] = 255,
226 static const struct rt6_info ip6_null_entry_template = {
228 .__refcnt = ATOMIC_INIT(1),
230 .obsolete = DST_OBSOLETE_FORCE_CHK,
231 .error = -ENETUNREACH,
232 .input = ip6_pkt_discard,
233 .output = ip6_pkt_discard_out,
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
236 .rt6i_protocol = RTPROT_KERNEL,
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
241 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
243 static int ip6_pkt_prohibit(struct sk_buff *skb);
244 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
246 static const struct rt6_info ip6_prohibit_entry_template = {
248 .__refcnt = ATOMIC_INIT(1),
250 .obsolete = DST_OBSOLETE_FORCE_CHK,
252 .input = ip6_pkt_prohibit,
253 .output = ip6_pkt_prohibit_out,
255 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
256 .rt6i_protocol = RTPROT_KERNEL,
257 .rt6i_metric = ~(u32) 0,
258 .rt6i_ref = ATOMIC_INIT(1),
261 static const struct rt6_info ip6_blk_hole_entry_template = {
263 .__refcnt = ATOMIC_INIT(1),
265 .obsolete = DST_OBSOLETE_FORCE_CHK,
267 .input = dst_discard,
268 .output = dst_discard,
270 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
271 .rt6i_protocol = RTPROT_KERNEL,
272 .rt6i_metric = ~(u32) 0,
273 .rt6i_ref = ATOMIC_INIT(1),
278 /* allocate dst with ip6_dst_ops */
279 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280 struct net_device *dev,
282 struct fib6_table *table)
284 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285 0, DST_OBSOLETE_FORCE_CHK, flags);
288 struct dst_entry *dst = &rt->dst;
290 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292 rt->rt6i_genid = rt_genid(net);
293 INIT_LIST_HEAD(&rt->rt6i_siblings);
294 rt->rt6i_nsiblings = 0;
299 static void ip6_dst_destroy(struct dst_entry *dst)
301 struct rt6_info *rt = (struct rt6_info *)dst;
302 struct inet6_dev *idev = rt->rt6i_idev;
305 neigh_release(rt->n);
307 if (!(rt->dst.flags & DST_HOST))
308 dst_destroy_metrics_generic(dst);
311 rt->rt6i_idev = NULL;
315 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316 dst_release(dst->from);
318 if (rt6_has_peer(rt)) {
319 struct inet_peer *peer = rt6_peer_ptr(rt);
324 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
326 static u32 rt6_peer_genid(void)
328 return atomic_read(&__rt6_peer_genid);
331 void rt6_bind_peer(struct rt6_info *rt, int create)
333 struct inet_peer_base *base;
334 struct inet_peer *peer;
336 base = inetpeer_base_ptr(rt->_rt6i_peer);
340 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
342 if (!rt6_set_peer(rt, peer))
345 rt->rt6i_peer_genid = rt6_peer_genid();
349 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
352 struct rt6_info *rt = (struct rt6_info *)dst;
353 struct inet6_dev *idev = rt->rt6i_idev;
354 struct net_device *loopback_dev =
355 dev_net(dev)->loopback_dev;
357 if (dev != loopback_dev) {
358 if (idev && idev->dev == dev) {
359 struct inet6_dev *loopback_idev =
360 in6_dev_get(loopback_dev);
362 rt->rt6i_idev = loopback_idev;
366 if (rt->n && rt->n->dev == dev) {
367 rt->n->dev = loopback_dev;
368 dev_hold(loopback_dev);
374 static bool rt6_check_expired(const struct rt6_info *rt)
376 if (rt->rt6i_flags & RTF_EXPIRES) {
377 if (time_after(jiffies, rt->dst.expires))
379 } else if (rt->dst.from) {
380 return rt6_check_expired((struct rt6_info *) rt->dst.from);
385 static bool rt6_need_strict(const struct in6_addr *daddr)
387 return ipv6_addr_type(daddr) &
388 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
391 /* Multipath route selection:
392 * Hash based function using packet header and flowlabel.
393 * Adapted from fib_info_hashfn()
395 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
396 const struct flowi6 *fl6)
398 unsigned int val = fl6->flowi6_proto;
400 val ^= (__force u32)fl6->daddr.s6_addr32[0];
401 val ^= (__force u32)fl6->daddr.s6_addr32[1];
402 val ^= (__force u32)fl6->daddr.s6_addr32[2];
403 val ^= (__force u32)fl6->daddr.s6_addr32[3];
405 val ^= (__force u32)fl6->saddr.s6_addr32[0];
406 val ^= (__force u32)fl6->saddr.s6_addr32[1];
407 val ^= (__force u32)fl6->saddr.s6_addr32[2];
408 val ^= (__force u32)fl6->saddr.s6_addr32[3];
410 /* Work only if this not encapsulated */
411 switch (fl6->flowi6_proto) {
415 val ^= (__force u16)fl6->fl6_sport;
416 val ^= (__force u16)fl6->fl6_dport;
420 val ^= (__force u16)fl6->fl6_icmp_type;
421 val ^= (__force u16)fl6->fl6_icmp_code;
424 /* RFC6438 recommands to use flowlabel */
425 val ^= (__force u32)fl6->flowlabel;
427 /* Perhaps, we need to tune, this function? */
428 val = val ^ (val >> 7) ^ (val >> 12);
429 return val % candidate_count;
432 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
435 struct rt6_info *sibling, *next_sibling;
438 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
439 /* Don't change the route, if route_choosen == 0
440 * (siblings does not include ourself)
443 list_for_each_entry_safe(sibling, next_sibling,
444 &match->rt6i_siblings, rt6i_siblings) {
446 if (route_choosen == 0) {
455 * Route lookup. Any table->tb6_lock is implied.
458 static inline struct rt6_info *rt6_device_match(struct net *net,
460 const struct in6_addr *saddr,
464 struct rt6_info *local = NULL;
465 struct rt6_info *sprt;
467 if (!oif && ipv6_addr_any(saddr))
470 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
471 struct net_device *dev = sprt->dst.dev;
474 if (dev->ifindex == oif)
476 if (dev->flags & IFF_LOOPBACK) {
477 if (!sprt->rt6i_idev ||
478 sprt->rt6i_idev->dev->ifindex != oif) {
479 if (flags & RT6_LOOKUP_F_IFACE && oif)
481 if (local && (!oif ||
482 local->rt6i_idev->dev->ifindex == oif))
488 if (ipv6_chk_addr(net, saddr, dev,
489 flags & RT6_LOOKUP_F_IFACE))
498 if (flags & RT6_LOOKUP_F_IFACE)
499 return net->ipv6.ip6_null_entry;
505 #ifdef CONFIG_IPV6_ROUTER_PREF
506 static void rt6_probe(struct rt6_info *rt)
508 struct neighbour *neigh;
510 * Okay, this does not seem to be appropriate
511 * for now, however, we need to check if it
512 * is really so; aka Router Reachability Probing.
514 * Router Reachability Probe MUST be rate-limited
515 * to no more than one per minute.
517 neigh = rt ? rt->n : NULL;
518 if (!neigh || (neigh->nud_state & NUD_VALID))
520 read_lock_bh(&neigh->lock);
521 if (!(neigh->nud_state & NUD_VALID) &&
522 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
523 struct in6_addr mcaddr;
524 struct in6_addr *target;
526 neigh->updated = jiffies;
527 read_unlock_bh(&neigh->lock);
529 target = (struct in6_addr *)&neigh->primary_key;
530 addrconf_addr_solict_mult(target, &mcaddr);
531 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
533 read_unlock_bh(&neigh->lock);
537 static inline void rt6_probe(struct rt6_info *rt)
543 * Default Router Selection (RFC 2461 6.3.6)
545 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
547 struct net_device *dev = rt->dst.dev;
548 if (!oif || dev->ifindex == oif)
550 if ((dev->flags & IFF_LOOPBACK) &&
551 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
556 static inline int rt6_check_neigh(struct rt6_info *rt)
558 struct neighbour *neigh;
562 if (rt->rt6i_flags & RTF_NONEXTHOP ||
563 !(rt->rt6i_flags & RTF_GATEWAY))
566 read_lock_bh(&neigh->lock);
567 if (neigh->nud_state & NUD_VALID)
569 #ifdef CONFIG_IPV6_ROUTER_PREF
570 else if (neigh->nud_state & NUD_FAILED)
575 read_unlock_bh(&neigh->lock);
581 static int rt6_score_route(struct rt6_info *rt, int oif,
586 m = rt6_check_dev(rt, oif);
587 if (!m && (strict & RT6_LOOKUP_F_IFACE))
589 #ifdef CONFIG_IPV6_ROUTER_PREF
590 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
592 n = rt6_check_neigh(rt);
593 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
598 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
599 int *mpri, struct rt6_info *match)
603 if (rt6_check_expired(rt))
606 m = rt6_score_route(rt, oif, strict);
611 if (strict & RT6_LOOKUP_F_REACHABLE)
615 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
623 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
624 struct rt6_info *rr_head,
625 u32 metric, int oif, int strict)
627 struct rt6_info *rt, *match;
631 for (rt = rr_head; rt && rt->rt6i_metric == metric;
632 rt = rt->dst.rt6_next)
633 match = find_match(rt, oif, strict, &mpri, match);
634 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
635 rt = rt->dst.rt6_next)
636 match = find_match(rt, oif, strict, &mpri, match);
641 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
643 struct rt6_info *match, *rt0;
648 fn->rr_ptr = rt0 = fn->leaf;
650 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
653 (strict & RT6_LOOKUP_F_REACHABLE)) {
654 struct rt6_info *next = rt0->dst.rt6_next;
656 /* no entries matched; do round-robin */
657 if (!next || next->rt6i_metric != rt0->rt6i_metric)
664 net = dev_net(rt0->dst.dev);
665 return match ? match : net->ipv6.ip6_null_entry;
668 #ifdef CONFIG_IPV6_ROUTE_INFO
669 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
670 const struct in6_addr *gwaddr)
672 struct net *net = dev_net(dev);
673 struct route_info *rinfo = (struct route_info *) opt;
674 struct in6_addr prefix_buf, *prefix;
676 unsigned long lifetime;
679 if (len < sizeof(struct route_info)) {
683 /* Sanity check for prefix_len and length */
684 if (rinfo->length > 3) {
686 } else if (rinfo->prefix_len > 128) {
688 } else if (rinfo->prefix_len > 64) {
689 if (rinfo->length < 2) {
692 } else if (rinfo->prefix_len > 0) {
693 if (rinfo->length < 1) {
698 pref = rinfo->route_pref;
699 if (pref == ICMPV6_ROUTER_PREF_INVALID)
702 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
704 if (rinfo->length == 3)
705 prefix = (struct in6_addr *)rinfo->prefix;
707 /* this function is safe */
708 ipv6_addr_prefix(&prefix_buf,
709 (struct in6_addr *)rinfo->prefix,
711 prefix = &prefix_buf;
714 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
717 if (rt && !lifetime) {
723 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
726 rt->rt6i_flags = RTF_ROUTEINFO |
727 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
730 if (!addrconf_finite_timeout(lifetime))
731 rt6_clean_expires(rt);
733 rt6_set_expires(rt, jiffies + HZ * lifetime);
735 dst_release(&rt->dst);
741 #define BACKTRACK(__net, saddr) \
743 if (rt == __net->ipv6.ip6_null_entry) { \
744 struct fib6_node *pn; \
746 if (fn->fn_flags & RTN_TL_ROOT) \
749 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
750 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
753 if (fn->fn_flags & RTN_RTINFO) \
759 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
760 struct fib6_table *table,
761 struct flowi6 *fl6, int flags)
763 struct fib6_node *fn;
766 read_lock_bh(&table->tb6_lock);
767 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
770 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
771 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
772 rt = rt6_multipath_select(rt, fl6);
773 BACKTRACK(net, &fl6->saddr);
775 dst_use(&rt->dst, jiffies);
776 read_unlock_bh(&table->tb6_lock);
781 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
784 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
786 EXPORT_SYMBOL_GPL(ip6_route_lookup);
788 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
789 const struct in6_addr *saddr, int oif, int strict)
791 struct flowi6 fl6 = {
795 struct dst_entry *dst;
796 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
799 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
800 flags |= RT6_LOOKUP_F_HAS_SADDR;
803 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
805 return (struct rt6_info *) dst;
812 EXPORT_SYMBOL(rt6_lookup);
814 /* ip6_ins_rt is called with FREE table->tb6_lock.
815 It takes new route entry, the addition fails by any reason the
816 route is freed. In any case, if caller does not hold it, it may
820 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
823 struct fib6_table *table;
825 table = rt->rt6i_table;
826 write_lock_bh(&table->tb6_lock);
827 err = fib6_add(&table->tb6_root, rt, info);
828 write_unlock_bh(&table->tb6_lock);
833 int ip6_ins_rt(struct rt6_info *rt)
835 struct nl_info info = {
836 .nl_net = dev_net(rt->dst.dev),
838 return __ip6_ins_rt(rt, &info);
841 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
842 const struct in6_addr *daddr,
843 const struct in6_addr *saddr)
851 rt = ip6_rt_copy(ort, daddr);
854 int attempts = !in_softirq();
856 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
857 if (ort->rt6i_dst.plen != 128 &&
858 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
859 rt->rt6i_flags |= RTF_ANYCAST;
860 rt->rt6i_gateway = *daddr;
863 rt->rt6i_flags |= RTF_CACHE;
865 #ifdef CONFIG_IPV6_SUBTREES
866 if (rt->rt6i_src.plen && saddr) {
867 rt->rt6i_src.addr = *saddr;
868 rt->rt6i_src.plen = 128;
873 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
874 struct net *net = dev_net(rt->dst.dev);
875 int saved_rt_min_interval =
876 net->ipv6.sysctl.ip6_rt_gc_min_interval;
877 int saved_rt_elasticity =
878 net->ipv6.sysctl.ip6_rt_gc_elasticity;
880 if (attempts-- > 0) {
881 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
882 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
884 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
886 net->ipv6.sysctl.ip6_rt_gc_elasticity =
888 net->ipv6.sysctl.ip6_rt_gc_min_interval =
889 saved_rt_min_interval;
893 net_warn_ratelimited("Neighbour table overflow\n");
902 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
903 const struct in6_addr *daddr)
905 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
908 rt->rt6i_flags |= RTF_CACHE;
909 rt->n = neigh_clone(ort->n);
914 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
915 struct flowi6 *fl6, int flags)
917 struct fib6_node *fn;
918 struct rt6_info *rt, *nrt;
922 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
924 strict |= flags & RT6_LOOKUP_F_IFACE;
927 read_lock_bh(&table->tb6_lock);
930 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
933 rt = rt6_select(fn, oif, strict | reachable);
934 if (rt->rt6i_nsiblings && oif == 0)
935 rt = rt6_multipath_select(rt, fl6);
936 BACKTRACK(net, &fl6->saddr);
937 if (rt == net->ipv6.ip6_null_entry ||
938 rt->rt6i_flags & RTF_CACHE)
942 read_unlock_bh(&table->tb6_lock);
944 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
945 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
946 else if (!(rt->dst.flags & DST_HOST))
947 nrt = rt6_alloc_clone(rt, &fl6->daddr);
951 dst_release(&rt->dst);
952 rt = nrt ? : net->ipv6.ip6_null_entry;
956 err = ip6_ins_rt(nrt);
965 * Race condition! In the gap, when table->tb6_lock was
966 * released someone could insert this route. Relookup.
968 dst_release(&rt->dst);
977 read_unlock_bh(&table->tb6_lock);
979 rt->dst.lastuse = jiffies;
985 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
986 struct flowi6 *fl6, int flags)
988 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
991 static struct dst_entry *ip6_route_input_lookup(struct net *net,
992 struct net_device *dev,
993 struct flowi6 *fl6, int flags)
995 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
996 flags |= RT6_LOOKUP_F_IFACE;
998 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1001 void ip6_route_input(struct sk_buff *skb)
1003 const struct ipv6hdr *iph = ipv6_hdr(skb);
1004 struct net *net = dev_net(skb->dev);
1005 int flags = RT6_LOOKUP_F_HAS_SADDR;
1006 struct flowi6 fl6 = {
1007 .flowi6_iif = skb->dev->ifindex,
1008 .daddr = iph->daddr,
1009 .saddr = iph->saddr,
1010 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
1011 .flowi6_mark = skb->mark,
1012 .flowi6_proto = iph->nexthdr,
1015 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1018 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1019 struct flowi6 *fl6, int flags)
1021 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1024 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1029 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1031 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1032 flags |= RT6_LOOKUP_F_IFACE;
1034 if (!ipv6_addr_any(&fl6->saddr))
1035 flags |= RT6_LOOKUP_F_HAS_SADDR;
1037 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1039 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1042 EXPORT_SYMBOL(ip6_route_output);
1044 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1046 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1047 struct dst_entry *new = NULL;
1049 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1053 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1054 rt6_init_peer(rt, net->ipv6.peers);
1057 new->input = dst_discard;
1058 new->output = dst_discard;
1060 if (dst_metrics_read_only(&ort->dst))
1061 new->_metrics = ort->dst._metrics;
1063 dst_copy_metrics(new, &ort->dst);
1064 rt->rt6i_idev = ort->rt6i_idev;
1066 in6_dev_hold(rt->rt6i_idev);
1068 rt->rt6i_gateway = ort->rt6i_gateway;
1069 rt->rt6i_flags = ort->rt6i_flags;
1070 rt6_clean_expires(rt);
1071 rt->rt6i_metric = 0;
1073 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1074 #ifdef CONFIG_IPV6_SUBTREES
1075 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1081 dst_release(dst_orig);
1082 return new ? new : ERR_PTR(-ENOMEM);
1086 * Destination cache support functions
1089 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1091 struct rt6_info *rt;
1093 rt = (struct rt6_info *) dst;
1095 /* All IPV6 dsts are created with ->obsolete set to the value
1096 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1097 * into this function always.
1099 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1102 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1103 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1104 if (!rt6_has_peer(rt))
1105 rt6_bind_peer(rt, 0);
1106 rt->rt6i_peer_genid = rt6_peer_genid();
1113 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1115 struct rt6_info *rt = (struct rt6_info *) dst;
1118 if (rt->rt6i_flags & RTF_CACHE) {
1119 if (rt6_check_expired(rt)) {
1131 static void ip6_link_failure(struct sk_buff *skb)
1133 struct rt6_info *rt;
1135 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1137 rt = (struct rt6_info *) skb_dst(skb);
1139 if (rt->rt6i_flags & RTF_CACHE)
1140 rt6_update_expires(rt, 0);
1141 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1142 rt->rt6i_node->fn_sernum = -1;
1146 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1147 struct sk_buff *skb, u32 mtu)
1149 struct rt6_info *rt6 = (struct rt6_info*)dst;
1152 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1153 struct net *net = dev_net(dst->dev);
1155 rt6->rt6i_flags |= RTF_MODIFIED;
1156 if (mtu < IPV6_MIN_MTU) {
1157 u32 features = dst_metric(dst, RTAX_FEATURES);
1159 features |= RTAX_FEATURE_ALLFRAG;
1160 dst_metric_set(dst, RTAX_FEATURES, features);
1162 dst_metric_set(dst, RTAX_MTU, mtu);
1163 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1167 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1170 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1171 struct dst_entry *dst;
1174 memset(&fl6, 0, sizeof(fl6));
1175 fl6.flowi6_oif = oif;
1176 fl6.flowi6_mark = mark;
1177 fl6.flowi6_flags = 0;
1178 fl6.daddr = iph->daddr;
1179 fl6.saddr = iph->saddr;
1180 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1182 dst = ip6_route_output(net, NULL, &fl6);
1184 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1187 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1189 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1191 ip6_update_pmtu(skb, sock_net(sk), mtu,
1192 sk->sk_bound_dev_if, sk->sk_mark);
1194 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1196 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1198 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1199 struct dst_entry *dst;
1202 memset(&fl6, 0, sizeof(fl6));
1203 fl6.flowi6_oif = oif;
1204 fl6.flowi6_mark = mark;
1205 fl6.flowi6_flags = 0;
1206 fl6.daddr = iph->daddr;
1207 fl6.saddr = iph->saddr;
1208 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1210 dst = ip6_route_output(net, NULL, &fl6);
1212 rt6_do_redirect(dst, NULL, skb);
1215 EXPORT_SYMBOL_GPL(ip6_redirect);
1217 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1219 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1221 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1223 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1225 struct net_device *dev = dst->dev;
1226 unsigned int mtu = dst_mtu(dst);
1227 struct net *net = dev_net(dev);
1229 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1231 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1232 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1235 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1236 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1237 * IPV6_MAXPLEN is also valid and means: "any MSS,
1238 * rely only on pmtu discovery"
1240 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1245 static unsigned int ip6_mtu(const struct dst_entry *dst)
1247 struct inet6_dev *idev;
1248 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1256 idev = __in6_dev_get(dst->dev);
1258 mtu = idev->cnf.mtu6;
1264 static struct dst_entry *icmp6_dst_gc_list;
1265 static DEFINE_SPINLOCK(icmp6_dst_lock);
1267 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1268 struct neighbour *neigh,
1271 struct dst_entry *dst;
1272 struct rt6_info *rt;
1273 struct inet6_dev *idev = in6_dev_get(dev);
1274 struct net *net = dev_net(dev);
1276 if (unlikely(!idev))
1277 return ERR_PTR(-ENODEV);
1279 rt = ip6_dst_alloc(net, dev, 0, NULL);
1280 if (unlikely(!rt)) {
1282 dst = ERR_PTR(-ENOMEM);
1289 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1290 if (IS_ERR(neigh)) {
1293 return ERR_CAST(neigh);
1297 rt->dst.flags |= DST_HOST;
1298 rt->dst.output = ip6_output;
1300 atomic_set(&rt->dst.__refcnt, 1);
1301 rt->rt6i_dst.addr = fl6->daddr;
1302 rt->rt6i_dst.plen = 128;
1303 rt->rt6i_idev = idev;
1304 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1306 spin_lock_bh(&icmp6_dst_lock);
1307 rt->dst.next = icmp6_dst_gc_list;
1308 icmp6_dst_gc_list = &rt->dst;
1309 spin_unlock_bh(&icmp6_dst_lock);
1311 fib6_force_start_gc(net);
1313 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1319 int icmp6_dst_gc(void)
1321 struct dst_entry *dst, **pprev;
1324 spin_lock_bh(&icmp6_dst_lock);
1325 pprev = &icmp6_dst_gc_list;
1327 while ((dst = *pprev) != NULL) {
1328 if (!atomic_read(&dst->__refcnt)) {
1337 spin_unlock_bh(&icmp6_dst_lock);
1342 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1345 struct dst_entry *dst, **pprev;
1347 spin_lock_bh(&icmp6_dst_lock);
1348 pprev = &icmp6_dst_gc_list;
1349 while ((dst = *pprev) != NULL) {
1350 struct rt6_info *rt = (struct rt6_info *) dst;
1351 if (func(rt, arg)) {
1358 spin_unlock_bh(&icmp6_dst_lock);
1361 static int ip6_dst_gc(struct dst_ops *ops)
1363 unsigned long now = jiffies;
1364 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1365 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1366 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1367 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1368 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1369 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1372 entries = dst_entries_get_fast(ops);
1373 if (time_after(rt_last_gc + rt_min_interval, now) &&
1374 entries <= rt_max_size)
1377 net->ipv6.ip6_rt_gc_expire++;
1378 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1379 net->ipv6.ip6_rt_last_gc = now;
1380 entries = dst_entries_get_slow(ops);
1381 if (entries < ops->gc_thresh)
1382 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1384 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1385 return entries > rt_max_size;
1388 /* Clean host part of a prefix. Not necessary in radix tree,
1389 but results in cleaner routing tables.
1391 Remove it only when all the things will work!
1394 int ip6_dst_hoplimit(struct dst_entry *dst)
1396 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1397 if (hoplimit == 0) {
1398 struct net_device *dev = dst->dev;
1399 struct inet6_dev *idev;
1402 idev = __in6_dev_get(dev);
1404 hoplimit = idev->cnf.hop_limit;
1406 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1411 EXPORT_SYMBOL(ip6_dst_hoplimit);
1417 int ip6_route_add(struct fib6_config *cfg)
1420 struct net *net = cfg->fc_nlinfo.nl_net;
1421 struct rt6_info *rt = NULL;
1422 struct net_device *dev = NULL;
1423 struct inet6_dev *idev = NULL;
1424 struct fib6_table *table;
1427 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1429 #ifndef CONFIG_IPV6_SUBTREES
1430 if (cfg->fc_src_len)
1433 if (cfg->fc_ifindex) {
1435 dev = dev_get_by_index(net, cfg->fc_ifindex);
1438 idev = in6_dev_get(dev);
1443 if (cfg->fc_metric == 0)
1444 cfg->fc_metric = IP6_RT_PRIO_USER;
1447 if (cfg->fc_nlinfo.nlh &&
1448 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1449 table = fib6_get_table(net, cfg->fc_table);
1451 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1452 table = fib6_new_table(net, cfg->fc_table);
1455 table = fib6_new_table(net, cfg->fc_table);
1461 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1468 if (cfg->fc_flags & RTF_EXPIRES)
1469 rt6_set_expires(rt, jiffies +
1470 clock_t_to_jiffies(cfg->fc_expires));
1472 rt6_clean_expires(rt);
1474 if (cfg->fc_protocol == RTPROT_UNSPEC)
1475 cfg->fc_protocol = RTPROT_BOOT;
1476 rt->rt6i_protocol = cfg->fc_protocol;
1478 addr_type = ipv6_addr_type(&cfg->fc_dst);
1480 if (addr_type & IPV6_ADDR_MULTICAST)
1481 rt->dst.input = ip6_mc_input;
1482 else if (cfg->fc_flags & RTF_LOCAL)
1483 rt->dst.input = ip6_input;
1485 rt->dst.input = ip6_forward;
1487 rt->dst.output = ip6_output;
1489 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1490 rt->rt6i_dst.plen = cfg->fc_dst_len;
1491 if (rt->rt6i_dst.plen == 128)
1492 rt->dst.flags |= DST_HOST;
1494 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1495 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1500 dst_init_metrics(&rt->dst, metrics, 0);
1502 #ifdef CONFIG_IPV6_SUBTREES
1503 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1504 rt->rt6i_src.plen = cfg->fc_src_len;
1507 rt->rt6i_metric = cfg->fc_metric;
1509 /* We cannot add true routes via loopback here,
1510 they would result in kernel looping; promote them to reject routes
1512 if ((cfg->fc_flags & RTF_REJECT) ||
1513 (dev && (dev->flags & IFF_LOOPBACK) &&
1514 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1515 !(cfg->fc_flags & RTF_LOCAL))) {
1516 /* hold loopback dev/idev if we haven't done so. */
1517 if (dev != net->loopback_dev) {
1522 dev = net->loopback_dev;
1524 idev = in6_dev_get(dev);
1530 rt->dst.output = ip6_pkt_discard_out;
1531 rt->dst.input = ip6_pkt_discard;
1532 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1533 switch (cfg->fc_type) {
1535 rt->dst.error = -EINVAL;
1538 rt->dst.error = -EACCES;
1541 rt->dst.error = -EAGAIN;
1544 rt->dst.error = -ENETUNREACH;
1550 if (cfg->fc_flags & RTF_GATEWAY) {
1551 const struct in6_addr *gw_addr;
1554 gw_addr = &cfg->fc_gateway;
1555 rt->rt6i_gateway = *gw_addr;
1556 gwa_type = ipv6_addr_type(gw_addr);
1558 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1559 struct rt6_info *grt;
1561 /* IPv6 strictly inhibits using not link-local
1562 addresses as nexthop address.
1563 Otherwise, router will not able to send redirects.
1564 It is very good, but in some (rare!) circumstances
1565 (SIT, PtP, NBMA NOARP links) it is handy to allow
1566 some exceptions. --ANK
1569 if (!(gwa_type & IPV6_ADDR_UNICAST))
1572 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1574 err = -EHOSTUNREACH;
1578 if (dev != grt->dst.dev) {
1579 dst_release(&grt->dst);
1584 idev = grt->rt6i_idev;
1586 in6_dev_hold(grt->rt6i_idev);
1588 if (!(grt->rt6i_flags & RTF_GATEWAY))
1590 dst_release(&grt->dst);
1596 if (!dev || (dev->flags & IFF_LOOPBACK))
1604 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1605 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1609 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1610 rt->rt6i_prefsrc.plen = 128;
1612 rt->rt6i_prefsrc.plen = 0;
1614 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1615 err = rt6_bind_neighbour(rt, dev);
1620 rt->rt6i_flags = cfg->fc_flags;
1627 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1628 int type = nla_type(nla);
1631 if (type > RTAX_MAX) {
1636 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1642 rt->rt6i_idev = idev;
1643 rt->rt6i_table = table;
1645 cfg->fc_nlinfo.nl_net = dev_net(dev);
1647 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1659 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1662 struct fib6_table *table;
1663 struct net *net = dev_net(rt->dst.dev);
1665 if (rt == net->ipv6.ip6_null_entry) {
1670 table = rt->rt6i_table;
1671 write_lock_bh(&table->tb6_lock);
1672 err = fib6_del(rt, info);
1673 write_unlock_bh(&table->tb6_lock);
1676 dst_release(&rt->dst);
1680 int ip6_del_rt(struct rt6_info *rt)
1682 struct nl_info info = {
1683 .nl_net = dev_net(rt->dst.dev),
1685 return __ip6_del_rt(rt, &info);
1688 static int ip6_route_del(struct fib6_config *cfg)
1690 struct fib6_table *table;
1691 struct fib6_node *fn;
1692 struct rt6_info *rt;
1695 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1699 read_lock_bh(&table->tb6_lock);
1701 fn = fib6_locate(&table->tb6_root,
1702 &cfg->fc_dst, cfg->fc_dst_len,
1703 &cfg->fc_src, cfg->fc_src_len);
1706 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1707 if (cfg->fc_ifindex &&
1709 rt->dst.dev->ifindex != cfg->fc_ifindex))
1711 if (cfg->fc_flags & RTF_GATEWAY &&
1712 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1714 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1717 read_unlock_bh(&table->tb6_lock);
1719 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1722 read_unlock_bh(&table->tb6_lock);
1727 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1729 struct net *net = dev_net(skb->dev);
1730 struct netevent_redirect netevent;
1731 struct rt6_info *rt, *nrt = NULL;
1732 const struct in6_addr *target;
1733 struct ndisc_options ndopts;
1734 const struct in6_addr *dest;
1735 struct neighbour *old_neigh;
1736 struct inet6_dev *in6_dev;
1737 struct neighbour *neigh;
1738 struct icmp6hdr *icmph;
1739 int optlen, on_link;
1742 optlen = skb->tail - skb->transport_header;
1743 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1746 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1750 icmph = icmp6_hdr(skb);
1751 target = (const struct in6_addr *) (icmph + 1);
1754 if (ipv6_addr_is_multicast(dest)) {
1755 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1760 if (ipv6_addr_equal(dest, target)) {
1762 } else if (ipv6_addr_type(target) !=
1763 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1764 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1768 in6_dev = __in6_dev_get(skb->dev);
1771 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1775 * The IP source address of the Redirect MUST be the same as the current
1776 * first-hop router for the specified ICMP Destination Address.
1779 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1780 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1785 if (ndopts.nd_opts_tgt_lladdr) {
1786 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1789 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1794 rt = (struct rt6_info *) dst;
1795 if (rt == net->ipv6.ip6_null_entry) {
1796 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1800 /* Redirect received -> path was valid.
1801 * Look, redirects are sent only in response to data packets,
1802 * so that this nexthop apparently is reachable. --ANK
1804 dst_confirm(&rt->dst);
1806 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1810 /* Duplicate redirect: silently ignore. */
1812 if (neigh == old_neigh)
1816 * We have finally decided to accept it.
1819 neigh_update(neigh, lladdr, NUD_STALE,
1820 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1821 NEIGH_UPDATE_F_OVERRIDE|
1822 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1823 NEIGH_UPDATE_F_ISROUTER))
1826 nrt = ip6_rt_copy(rt, dest);
1830 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1832 nrt->rt6i_flags &= ~RTF_GATEWAY;
1834 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1835 nrt->n = neigh_clone(neigh);
1837 if (ip6_ins_rt(nrt))
1840 netevent.old = &rt->dst;
1841 netevent.old_neigh = old_neigh;
1842 netevent.new = &nrt->dst;
1843 netevent.new_neigh = neigh;
1844 netevent.daddr = dest;
1845 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1847 if (rt->rt6i_flags & RTF_CACHE) {
1848 rt = (struct rt6_info *) dst_clone(&rt->dst);
1853 neigh_release(neigh);
1857 * Misc support functions
1860 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1861 const struct in6_addr *dest)
1863 struct net *net = dev_net(ort->dst.dev);
1864 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1868 rt->dst.input = ort->dst.input;
1869 rt->dst.output = ort->dst.output;
1870 rt->dst.flags |= DST_HOST;
1872 rt->rt6i_dst.addr = *dest;
1873 rt->rt6i_dst.plen = 128;
1874 dst_copy_metrics(&rt->dst, &ort->dst);
1875 rt->dst.error = ort->dst.error;
1876 rt->rt6i_idev = ort->rt6i_idev;
1878 in6_dev_hold(rt->rt6i_idev);
1879 rt->dst.lastuse = jiffies;
1881 rt->rt6i_gateway = ort->rt6i_gateway;
1882 rt->rt6i_flags = ort->rt6i_flags;
1883 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1884 (RTF_DEFAULT | RTF_ADDRCONF))
1885 rt6_set_from(rt, ort);
1887 rt6_clean_expires(rt);
1888 rt->rt6i_metric = 0;
1890 #ifdef CONFIG_IPV6_SUBTREES
1891 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1893 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1894 rt->rt6i_table = ort->rt6i_table;
1899 #ifdef CONFIG_IPV6_ROUTE_INFO
1900 static struct rt6_info *rt6_get_route_info(struct net *net,
1901 const struct in6_addr *prefix, int prefixlen,
1902 const struct in6_addr *gwaddr, int ifindex)
1904 struct fib6_node *fn;
1905 struct rt6_info *rt = NULL;
1906 struct fib6_table *table;
1908 table = fib6_get_table(net, RT6_TABLE_INFO);
1912 read_lock_bh(&table->tb6_lock);
1913 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1917 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1918 if (rt->dst.dev->ifindex != ifindex)
1920 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1922 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1928 read_unlock_bh(&table->tb6_lock);
1932 static struct rt6_info *rt6_add_route_info(struct net *net,
1933 const struct in6_addr *prefix, int prefixlen,
1934 const struct in6_addr *gwaddr, int ifindex,
1937 struct fib6_config cfg = {
1938 .fc_table = RT6_TABLE_INFO,
1939 .fc_metric = IP6_RT_PRIO_USER,
1940 .fc_ifindex = ifindex,
1941 .fc_dst_len = prefixlen,
1942 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1943 RTF_UP | RTF_PREF(pref),
1944 .fc_nlinfo.portid = 0,
1945 .fc_nlinfo.nlh = NULL,
1946 .fc_nlinfo.nl_net = net,
1949 cfg.fc_dst = *prefix;
1950 cfg.fc_gateway = *gwaddr;
1952 /* We should treat it as a default route if prefix length is 0. */
1954 cfg.fc_flags |= RTF_DEFAULT;
1956 ip6_route_add(&cfg);
1958 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1962 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1964 struct rt6_info *rt;
1965 struct fib6_table *table;
1967 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1971 read_lock_bh(&table->tb6_lock);
1972 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1973 if (dev == rt->dst.dev &&
1974 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1975 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1980 read_unlock_bh(&table->tb6_lock);
1984 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1985 struct net_device *dev,
1988 struct fib6_config cfg = {
1989 .fc_table = RT6_TABLE_DFLT,
1990 .fc_metric = IP6_RT_PRIO_USER,
1991 .fc_ifindex = dev->ifindex,
1992 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1993 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1994 .fc_nlinfo.portid = 0,
1995 .fc_nlinfo.nlh = NULL,
1996 .fc_nlinfo.nl_net = dev_net(dev),
1999 cfg.fc_gateway = *gwaddr;
2001 ip6_route_add(&cfg);
2003 return rt6_get_dflt_router(gwaddr, dev);
2006 void rt6_purge_dflt_routers(struct net *net)
2008 struct rt6_info *rt;
2009 struct fib6_table *table;
2011 /* NOTE: Keep consistent with rt6_get_dflt_router */
2012 table = fib6_get_table(net, RT6_TABLE_DFLT);
2017 read_lock_bh(&table->tb6_lock);
2018 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2019 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
2021 read_unlock_bh(&table->tb6_lock);
2026 read_unlock_bh(&table->tb6_lock);
2029 static void rtmsg_to_fib6_config(struct net *net,
2030 struct in6_rtmsg *rtmsg,
2031 struct fib6_config *cfg)
2033 memset(cfg, 0, sizeof(*cfg));
2035 cfg->fc_table = RT6_TABLE_MAIN;
2036 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2037 cfg->fc_metric = rtmsg->rtmsg_metric;
2038 cfg->fc_expires = rtmsg->rtmsg_info;
2039 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2040 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2041 cfg->fc_flags = rtmsg->rtmsg_flags;
2043 cfg->fc_nlinfo.nl_net = net;
2045 cfg->fc_dst = rtmsg->rtmsg_dst;
2046 cfg->fc_src = rtmsg->rtmsg_src;
2047 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2050 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2052 struct fib6_config cfg;
2053 struct in6_rtmsg rtmsg;
2057 case SIOCADDRT: /* Add a route */
2058 case SIOCDELRT: /* Delete a route */
2059 if (!capable(CAP_NET_ADMIN))
2061 err = copy_from_user(&rtmsg, arg,
2062 sizeof(struct in6_rtmsg));
2066 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2071 err = ip6_route_add(&cfg);
2074 err = ip6_route_del(&cfg);
2088 * Drop the packet on the floor
2091 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2094 struct dst_entry *dst = skb_dst(skb);
2095 switch (ipstats_mib_noroutes) {
2096 case IPSTATS_MIB_INNOROUTES:
2097 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2098 if (type == IPV6_ADDR_ANY) {
2099 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2100 IPSTATS_MIB_INADDRERRORS);
2104 case IPSTATS_MIB_OUTNOROUTES:
2105 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2106 ipstats_mib_noroutes);
2109 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2114 static int ip6_pkt_discard(struct sk_buff *skb)
2116 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2119 static int ip6_pkt_discard_out(struct sk_buff *skb)
2121 skb->dev = skb_dst(skb)->dev;
2122 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2125 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2127 static int ip6_pkt_prohibit(struct sk_buff *skb)
2129 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2132 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2134 skb->dev = skb_dst(skb)->dev;
2135 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2141 * Allocate a dst for local (unicast / anycast) address.
2144 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2145 const struct in6_addr *addr,
2148 struct net *net = dev_net(idev->dev);
2149 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2153 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2154 return ERR_PTR(-ENOMEM);
2159 rt->dst.flags |= DST_HOST;
2160 rt->dst.input = ip6_input;
2161 rt->dst.output = ip6_output;
2162 rt->rt6i_idev = idev;
2164 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2166 rt->rt6i_flags |= RTF_ANYCAST;
2168 rt->rt6i_flags |= RTF_LOCAL;
2169 err = rt6_bind_neighbour(rt, rt->dst.dev);
2172 return ERR_PTR(err);
2175 rt->rt6i_dst.addr = *addr;
2176 rt->rt6i_dst.plen = 128;
2177 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2179 atomic_set(&rt->dst.__refcnt, 1);
2184 int ip6_route_get_saddr(struct net *net,
2185 struct rt6_info *rt,
2186 const struct in6_addr *daddr,
2188 struct in6_addr *saddr)
2190 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2192 if (rt->rt6i_prefsrc.plen)
2193 *saddr = rt->rt6i_prefsrc.addr;
2195 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2196 daddr, prefs, saddr);
2200 /* remove deleted ip from prefsrc entries */
2201 struct arg_dev_net_ip {
2202 struct net_device *dev;
2204 struct in6_addr *addr;
2207 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2209 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2210 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2211 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2213 if (((void *)rt->dst.dev == dev || !dev) &&
2214 rt != net->ipv6.ip6_null_entry &&
2215 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2216 /* remove prefsrc entry */
2217 rt->rt6i_prefsrc.plen = 0;
2222 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2224 struct net *net = dev_net(ifp->idev->dev);
2225 struct arg_dev_net_ip adni = {
2226 .dev = ifp->idev->dev,
2230 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2233 struct arg_dev_net {
2234 struct net_device *dev;
2238 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2240 const struct arg_dev_net *adn = arg;
2241 const struct net_device *dev = adn->dev;
2243 if ((rt->dst.dev == dev || !dev) &&
2244 rt != adn->net->ipv6.ip6_null_entry)
2250 void rt6_ifdown(struct net *net, struct net_device *dev)
2252 struct arg_dev_net adn = {
2257 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2258 icmp6_clean_all(fib6_ifdown, &adn);
2261 struct rt6_mtu_change_arg {
2262 struct net_device *dev;
2266 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2268 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2269 struct inet6_dev *idev;
2271 /* In IPv6 pmtu discovery is not optional,
2272 so that RTAX_MTU lock cannot disable it.
2273 We still use this lock to block changes
2274 caused by addrconf/ndisc.
2277 idev = __in6_dev_get(arg->dev);
2281 /* For administrative MTU increase, there is no way to discover
2282 IPv6 PMTU increase, so PMTU increase should be updated here.
2283 Since RFC 1981 doesn't include administrative MTU increase
2284 update PMTU increase is a MUST. (i.e. jumbo frame)
2287 If new MTU is less than route PMTU, this new MTU will be the
2288 lowest MTU in the path, update the route PMTU to reflect PMTU
2289 decreases; if new MTU is greater than route PMTU, and the
2290 old MTU is the lowest MTU in the path, update the route PMTU
2291 to reflect the increase. In this case if the other nodes' MTU
2292 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2295 if (rt->dst.dev == arg->dev &&
2296 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2297 (dst_mtu(&rt->dst) >= arg->mtu ||
2298 (dst_mtu(&rt->dst) < arg->mtu &&
2299 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2300 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2305 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2307 struct rt6_mtu_change_arg arg = {
2312 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2315 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2316 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2317 [RTA_OIF] = { .type = NLA_U32 },
2318 [RTA_IIF] = { .type = NLA_U32 },
2319 [RTA_PRIORITY] = { .type = NLA_U32 },
2320 [RTA_METRICS] = { .type = NLA_NESTED },
2321 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2324 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2325 struct fib6_config *cfg)
2328 struct nlattr *tb[RTA_MAX+1];
2331 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2336 rtm = nlmsg_data(nlh);
2337 memset(cfg, 0, sizeof(*cfg));
2339 cfg->fc_table = rtm->rtm_table;
2340 cfg->fc_dst_len = rtm->rtm_dst_len;
2341 cfg->fc_src_len = rtm->rtm_src_len;
2342 cfg->fc_flags = RTF_UP;
2343 cfg->fc_protocol = rtm->rtm_protocol;
2344 cfg->fc_type = rtm->rtm_type;
2346 if (rtm->rtm_type == RTN_UNREACHABLE ||
2347 rtm->rtm_type == RTN_BLACKHOLE ||
2348 rtm->rtm_type == RTN_PROHIBIT ||
2349 rtm->rtm_type == RTN_THROW)
2350 cfg->fc_flags |= RTF_REJECT;
2352 if (rtm->rtm_type == RTN_LOCAL)
2353 cfg->fc_flags |= RTF_LOCAL;
2355 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2356 cfg->fc_nlinfo.nlh = nlh;
2357 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2359 if (tb[RTA_GATEWAY]) {
2360 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2361 cfg->fc_flags |= RTF_GATEWAY;
2365 int plen = (rtm->rtm_dst_len + 7) >> 3;
2367 if (nla_len(tb[RTA_DST]) < plen)
2370 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2374 int plen = (rtm->rtm_src_len + 7) >> 3;
2376 if (nla_len(tb[RTA_SRC]) < plen)
2379 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2382 if (tb[RTA_PREFSRC])
2383 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2386 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2388 if (tb[RTA_PRIORITY])
2389 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2391 if (tb[RTA_METRICS]) {
2392 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2393 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2397 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2399 if (tb[RTA_MULTIPATH]) {
2400 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2401 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2409 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2411 struct fib6_config r_cfg;
2412 struct rtnexthop *rtnh;
2415 int err = 0, last_err = 0;
2418 rtnh = (struct rtnexthop *)cfg->fc_mp;
2419 remaining = cfg->fc_mp_len;
2421 /* Parse a Multipath Entry */
2422 while (rtnh_ok(rtnh, remaining)) {
2423 memcpy(&r_cfg, cfg, sizeof(*cfg));
2424 if (rtnh->rtnh_ifindex)
2425 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2427 attrlen = rtnh_attrlen(rtnh);
2429 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2431 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2433 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2434 r_cfg.fc_flags |= RTF_GATEWAY;
2437 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2440 /* If we are trying to remove a route, do not stop the
2441 * loop when ip6_route_del() fails (because next hop is
2442 * already gone), we should try to remove all next hops.
2445 /* If add fails, we should try to delete all
2446 * next hops that have been already added.
2452 rtnh = rtnh_next(rtnh, &remaining);
2458 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2460 struct fib6_config cfg;
2463 err = rtm_to_fib6_config(skb, nlh, &cfg);
2468 return ip6_route_multipath(&cfg, 0);
2470 return ip6_route_del(&cfg);
2473 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2475 struct fib6_config cfg;
2478 err = rtm_to_fib6_config(skb, nlh, &cfg);
2483 return ip6_route_multipath(&cfg, 1);
2485 return ip6_route_add(&cfg);
2488 static inline size_t rt6_nlmsg_size(void)
2490 return NLMSG_ALIGN(sizeof(struct rtmsg))
2491 + nla_total_size(16) /* RTA_SRC */
2492 + nla_total_size(16) /* RTA_DST */
2493 + nla_total_size(16) /* RTA_GATEWAY */
2494 + nla_total_size(16) /* RTA_PREFSRC */
2495 + nla_total_size(4) /* RTA_TABLE */
2496 + nla_total_size(4) /* RTA_IIF */
2497 + nla_total_size(4) /* RTA_OIF */
2498 + nla_total_size(4) /* RTA_PRIORITY */
2499 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2500 + nla_total_size(sizeof(struct rta_cacheinfo));
2503 static int rt6_fill_node(struct net *net,
2504 struct sk_buff *skb, struct rt6_info *rt,
2505 struct in6_addr *dst, struct in6_addr *src,
2506 int iif, int type, u32 portid, u32 seq,
2507 int prefix, int nowait, unsigned int flags)
2510 struct nlmsghdr *nlh;
2513 struct neighbour *n;
2515 if (prefix) { /* user wants prefix routes only */
2516 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2517 /* success since this is not a prefix route */
2522 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2526 rtm = nlmsg_data(nlh);
2527 rtm->rtm_family = AF_INET6;
2528 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2529 rtm->rtm_src_len = rt->rt6i_src.plen;
2532 table = rt->rt6i_table->tb6_id;
2534 table = RT6_TABLE_UNSPEC;
2535 rtm->rtm_table = table;
2536 if (nla_put_u32(skb, RTA_TABLE, table))
2537 goto nla_put_failure;
2538 if (rt->rt6i_flags & RTF_REJECT) {
2539 switch (rt->dst.error) {
2541 rtm->rtm_type = RTN_BLACKHOLE;
2544 rtm->rtm_type = RTN_PROHIBIT;
2547 rtm->rtm_type = RTN_THROW;
2550 rtm->rtm_type = RTN_UNREACHABLE;
2554 else if (rt->rt6i_flags & RTF_LOCAL)
2555 rtm->rtm_type = RTN_LOCAL;
2556 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2557 rtm->rtm_type = RTN_LOCAL;
2559 rtm->rtm_type = RTN_UNICAST;
2561 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2562 rtm->rtm_protocol = rt->rt6i_protocol;
2563 if (rt->rt6i_flags & RTF_DYNAMIC)
2564 rtm->rtm_protocol = RTPROT_REDIRECT;
2565 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2566 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2567 rtm->rtm_protocol = RTPROT_RA;
2569 rtm->rtm_protocol = RTPROT_KERNEL;
2572 if (rt->rt6i_flags & RTF_CACHE)
2573 rtm->rtm_flags |= RTM_F_CLONED;
2576 if (nla_put(skb, RTA_DST, 16, dst))
2577 goto nla_put_failure;
2578 rtm->rtm_dst_len = 128;
2579 } else if (rtm->rtm_dst_len)
2580 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2581 goto nla_put_failure;
2582 #ifdef CONFIG_IPV6_SUBTREES
2584 if (nla_put(skb, RTA_SRC, 16, src))
2585 goto nla_put_failure;
2586 rtm->rtm_src_len = 128;
2587 } else if (rtm->rtm_src_len &&
2588 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2589 goto nla_put_failure;
2592 #ifdef CONFIG_IPV6_MROUTE
2593 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2594 int err = ip6mr_get_route(net, skb, rtm, nowait);
2599 goto nla_put_failure;
2601 if (err == -EMSGSIZE)
2602 goto nla_put_failure;
2607 if (nla_put_u32(skb, RTA_IIF, iif))
2608 goto nla_put_failure;
2610 struct in6_addr saddr_buf;
2611 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2612 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2613 goto nla_put_failure;
2616 if (rt->rt6i_prefsrc.plen) {
2617 struct in6_addr saddr_buf;
2618 saddr_buf = rt->rt6i_prefsrc.addr;
2619 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2620 goto nla_put_failure;
2623 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2624 goto nla_put_failure;
2628 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2629 goto nla_put_failure;
2633 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2634 goto nla_put_failure;
2635 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2636 goto nla_put_failure;
2638 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2640 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2641 goto nla_put_failure;
2643 return nlmsg_end(skb, nlh);
2646 nlmsg_cancel(skb, nlh);
2650 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2652 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2655 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2656 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2657 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2661 return rt6_fill_node(arg->net,
2662 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2663 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2664 prefix, 0, NLM_F_MULTI);
2667 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2669 struct net *net = sock_net(in_skb->sk);
2670 struct nlattr *tb[RTA_MAX+1];
2671 struct rt6_info *rt;
2672 struct sk_buff *skb;
2675 int err, iif = 0, oif = 0;
2677 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2682 memset(&fl6, 0, sizeof(fl6));
2685 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2688 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2692 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2695 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2699 iif = nla_get_u32(tb[RTA_IIF]);
2702 oif = nla_get_u32(tb[RTA_OIF]);
2705 struct net_device *dev;
2708 dev = __dev_get_by_index(net, iif);
2714 fl6.flowi6_iif = iif;
2716 if (!ipv6_addr_any(&fl6.saddr))
2717 flags |= RT6_LOOKUP_F_HAS_SADDR;
2719 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2722 fl6.flowi6_oif = oif;
2724 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2727 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2729 dst_release(&rt->dst);
2734 /* Reserve room for dummy headers, this skb can pass
2735 through good chunk of routing engine.
2737 skb_reset_mac_header(skb);
2738 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2740 skb_dst_set(skb, &rt->dst);
2742 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2743 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2744 nlh->nlmsg_seq, 0, 0, 0);
2750 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2755 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2757 struct sk_buff *skb;
2758 struct net *net = info->nl_net;
2763 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2765 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2769 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2770 event, info->portid, seq, 0, 0, 0);
2772 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2773 WARN_ON(err == -EMSGSIZE);
2777 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2778 info->nlh, gfp_any());
2782 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2785 static int ip6_route_dev_notify(struct notifier_block *this,
2786 unsigned long event, void *data)
2788 struct net_device *dev = (struct net_device *)data;
2789 struct net *net = dev_net(dev);
2791 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2792 net->ipv6.ip6_null_entry->dst.dev = dev;
2793 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2794 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2795 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2796 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2797 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2798 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2809 #ifdef CONFIG_PROC_FS
2820 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2822 struct seq_file *m = p_arg;
2823 struct neighbour *n;
2825 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2827 #ifdef CONFIG_IPV6_SUBTREES
2828 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2830 seq_puts(m, "00000000000000000000000000000000 00 ");
2834 seq_printf(m, "%pi6", n->primary_key);
2836 seq_puts(m, "00000000000000000000000000000000");
2838 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2839 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2840 rt->dst.__use, rt->rt6i_flags,
2841 rt->dst.dev ? rt->dst.dev->name : "");
2845 static int ipv6_route_show(struct seq_file *m, void *v)
2847 struct net *net = (struct net *)m->private;
2848 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2852 static int ipv6_route_open(struct inode *inode, struct file *file)
2854 return single_open_net(inode, file, ipv6_route_show);
2857 static const struct file_operations ipv6_route_proc_fops = {
2858 .owner = THIS_MODULE,
2859 .open = ipv6_route_open,
2861 .llseek = seq_lseek,
2862 .release = single_release_net,
2865 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2867 struct net *net = (struct net *)seq->private;
2868 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2869 net->ipv6.rt6_stats->fib_nodes,
2870 net->ipv6.rt6_stats->fib_route_nodes,
2871 net->ipv6.rt6_stats->fib_rt_alloc,
2872 net->ipv6.rt6_stats->fib_rt_entries,
2873 net->ipv6.rt6_stats->fib_rt_cache,
2874 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2875 net->ipv6.rt6_stats->fib_discarded_routes);
2880 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2882 return single_open_net(inode, file, rt6_stats_seq_show);
2885 static const struct file_operations rt6_stats_seq_fops = {
2886 .owner = THIS_MODULE,
2887 .open = rt6_stats_seq_open,
2889 .llseek = seq_lseek,
2890 .release = single_release_net,
2892 #endif /* CONFIG_PROC_FS */
2894 #ifdef CONFIG_SYSCTL
2897 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2898 void __user *buffer, size_t *lenp, loff_t *ppos)
2905 net = (struct net *)ctl->extra1;
2906 delay = net->ipv6.sysctl.flush_delay;
2907 proc_dointvec(ctl, write, buffer, lenp, ppos);
2908 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2912 ctl_table ipv6_route_table_template[] = {
2914 .procname = "flush",
2915 .data = &init_net.ipv6.sysctl.flush_delay,
2916 .maxlen = sizeof(int),
2918 .proc_handler = ipv6_sysctl_rtcache_flush
2921 .procname = "gc_thresh",
2922 .data = &ip6_dst_ops_template.gc_thresh,
2923 .maxlen = sizeof(int),
2925 .proc_handler = proc_dointvec,
2928 .procname = "max_size",
2929 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2930 .maxlen = sizeof(int),
2932 .proc_handler = proc_dointvec,
2935 .procname = "gc_min_interval",
2936 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2937 .maxlen = sizeof(int),
2939 .proc_handler = proc_dointvec_jiffies,
2942 .procname = "gc_timeout",
2943 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2944 .maxlen = sizeof(int),
2946 .proc_handler = proc_dointvec_jiffies,
2949 .procname = "gc_interval",
2950 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2951 .maxlen = sizeof(int),
2953 .proc_handler = proc_dointvec_jiffies,
2956 .procname = "gc_elasticity",
2957 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2958 .maxlen = sizeof(int),
2960 .proc_handler = proc_dointvec,
2963 .procname = "mtu_expires",
2964 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2965 .maxlen = sizeof(int),
2967 .proc_handler = proc_dointvec_jiffies,
2970 .procname = "min_adv_mss",
2971 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2972 .maxlen = sizeof(int),
2974 .proc_handler = proc_dointvec,
2977 .procname = "gc_min_interval_ms",
2978 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2979 .maxlen = sizeof(int),
2981 .proc_handler = proc_dointvec_ms_jiffies,
2986 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2988 struct ctl_table *table;
2990 table = kmemdup(ipv6_route_table_template,
2991 sizeof(ipv6_route_table_template),
2995 table[0].data = &net->ipv6.sysctl.flush_delay;
2996 table[0].extra1 = net;
2997 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2998 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2999 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3000 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3001 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3002 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3003 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3004 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3005 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3012 static int __net_init ip6_route_net_init(struct net *net)
3016 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3017 sizeof(net->ipv6.ip6_dst_ops));
3019 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3020 goto out_ip6_dst_ops;
3022 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3023 sizeof(*net->ipv6.ip6_null_entry),
3025 if (!net->ipv6.ip6_null_entry)
3026 goto out_ip6_dst_entries;
3027 net->ipv6.ip6_null_entry->dst.path =
3028 (struct dst_entry *)net->ipv6.ip6_null_entry;
3029 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3030 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3031 ip6_template_metrics, true);
3033 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3034 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3035 sizeof(*net->ipv6.ip6_prohibit_entry),
3037 if (!net->ipv6.ip6_prohibit_entry)
3038 goto out_ip6_null_entry;
3039 net->ipv6.ip6_prohibit_entry->dst.path =
3040 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3041 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3042 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3043 ip6_template_metrics, true);
3045 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3046 sizeof(*net->ipv6.ip6_blk_hole_entry),
3048 if (!net->ipv6.ip6_blk_hole_entry)
3049 goto out_ip6_prohibit_entry;
3050 net->ipv6.ip6_blk_hole_entry->dst.path =
3051 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3052 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3053 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3054 ip6_template_metrics, true);
3057 net->ipv6.sysctl.flush_delay = 0;
3058 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3059 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3060 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3061 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3062 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3063 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3064 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3066 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3072 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3073 out_ip6_prohibit_entry:
3074 kfree(net->ipv6.ip6_prohibit_entry);
3076 kfree(net->ipv6.ip6_null_entry);
3078 out_ip6_dst_entries:
3079 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3084 static void __net_exit ip6_route_net_exit(struct net *net)
3086 kfree(net->ipv6.ip6_null_entry);
3087 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3088 kfree(net->ipv6.ip6_prohibit_entry);
3089 kfree(net->ipv6.ip6_blk_hole_entry);
3091 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3094 static int __net_init ip6_route_net_init_late(struct net *net)
3096 #ifdef CONFIG_PROC_FS
3097 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3098 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3103 static void __net_exit ip6_route_net_exit_late(struct net *net)
3105 #ifdef CONFIG_PROC_FS
3106 proc_net_remove(net, "ipv6_route");
3107 proc_net_remove(net, "rt6_stats");
3111 static struct pernet_operations ip6_route_net_ops = {
3112 .init = ip6_route_net_init,
3113 .exit = ip6_route_net_exit,
3116 static int __net_init ipv6_inetpeer_init(struct net *net)
3118 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3122 inet_peer_base_init(bp);
3123 net->ipv6.peers = bp;
3127 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3129 struct inet_peer_base *bp = net->ipv6.peers;
3131 net->ipv6.peers = NULL;
3132 inetpeer_invalidate_tree(bp);
3136 static struct pernet_operations ipv6_inetpeer_ops = {
3137 .init = ipv6_inetpeer_init,
3138 .exit = ipv6_inetpeer_exit,
3141 static struct pernet_operations ip6_route_net_late_ops = {
3142 .init = ip6_route_net_init_late,
3143 .exit = ip6_route_net_exit_late,
3146 static struct notifier_block ip6_route_dev_notifier = {
3147 .notifier_call = ip6_route_dev_notify,
3151 int __init ip6_route_init(void)
3156 ip6_dst_ops_template.kmem_cachep =
3157 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3158 SLAB_HWCACHE_ALIGN, NULL);
3159 if (!ip6_dst_ops_template.kmem_cachep)
3162 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3164 goto out_kmem_cache;
3166 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3168 goto out_dst_entries;
3170 ret = register_pernet_subsys(&ip6_route_net_ops);
3172 goto out_register_inetpeer;
3174 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3176 /* Registering of the loopback is done before this portion of code,
3177 * the loopback reference in rt6_info will not be taken, do it
3178 * manually for init_net */
3179 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3180 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3181 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3182 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3183 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3184 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3185 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3189 goto out_register_subsys;
3195 ret = fib6_rules_init();
3199 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3201 goto fib6_rules_init;
3204 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3205 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3206 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3207 goto out_register_late_subsys;
3209 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3211 goto out_register_late_subsys;
3216 out_register_late_subsys:
3217 unregister_pernet_subsys(&ip6_route_net_late_ops);
3219 fib6_rules_cleanup();
3224 out_register_subsys:
3225 unregister_pernet_subsys(&ip6_route_net_ops);
3226 out_register_inetpeer:
3227 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3229 dst_entries_destroy(&ip6_dst_blackhole_ops);
3231 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3235 void ip6_route_cleanup(void)
3237 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3238 unregister_pernet_subsys(&ip6_route_net_late_ops);
3239 fib6_rules_cleanup();
3242 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3243 unregister_pernet_subsys(&ip6_route_net_ops);
3244 dst_entries_destroy(&ip6_dst_blackhole_ops);
3245 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);