1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Linux IPv6 multicast routing support for BSD pim6sd
4 * Based on net/ipv4/ipmr.c.
6 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
7 * LSIIT Laboratory, Strasbourg, France
8 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
10 * Copyright (C)2007,2008 USAGI/WIDE Project
11 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
14 #include <linux/uaccess.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/errno.h>
19 #include <linux/kernel.h>
20 #include <linux/fcntl.h>
21 #include <linux/stat.h>
22 #include <linux/socket.h>
23 #include <linux/inet.h>
24 #include <linux/netdevice.h>
25 #include <linux/inetdevice.h>
26 #include <linux/proc_fs.h>
27 #include <linux/seq_file.h>
28 #include <linux/init.h>
29 #include <linux/compat.h>
30 #include <linux/rhashtable.h>
31 #include <net/protocol.h>
32 #include <linux/skbuff.h>
34 #include <linux/notifier.h>
35 #include <linux/if_arp.h>
36 #include <net/checksum.h>
37 #include <net/netlink.h>
38 #include <net/fib_rules.h>
41 #include <net/ip6_route.h>
42 #include <linux/mroute6.h>
43 #include <linux/pim.h>
44 #include <net/addrconf.h>
45 #include <linux/netfilter_ipv6.h>
46 #include <linux/export.h>
47 #include <net/ip6_checksum.h>
48 #include <linux/netconf.h>
49 #include <net/ip_tunnels.h>
51 #include <linux/nospec.h>
54 struct fib_rule common;
61 /* Big lock, protecting vif table, mrt cache and mroute socket state.
62 Note that the changes are semaphored via rtnl_lock.
65 static DEFINE_SPINLOCK(mrt_lock);
67 static struct net_device *vif_dev_read(const struct vif_device *vif)
69 return rcu_dereference(vif->dev);
72 /* Multicast router control variables */
74 /* Special spinlock for queue of unresolved entries */
75 static DEFINE_SPINLOCK(mfc_unres_lock);
77 /* We return to original Alan's scheme. Hash table of resolved
78 entries is changed only in process context and protected
79 with weak lock mrt_lock. Queue of unresolved entries is protected
80 with strong spinlock mfc_unres_lock.
82 In this case data path is free of exclusive locks at all.
85 static struct kmem_cache *mrt_cachep __read_mostly;
87 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
88 static void ip6mr_free_table(struct mr_table *mrt);
90 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
91 struct net_device *dev, struct sk_buff *skb,
92 struct mfc6_cache *cache);
93 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
94 mifi_t mifi, int assert);
95 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
97 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
98 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
99 struct netlink_ext_ack *extack);
100 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
101 struct netlink_callback *cb);
102 static void mroute_clean_tables(struct mr_table *mrt, int flags);
103 static void ipmr_expire_process(struct timer_list *t);
105 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
106 #define ip6mr_for_each_table(mrt, net) \
107 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
108 lockdep_rtnl_is_held() || \
109 list_empty(&net->ipv6.mr6_tables))
111 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
112 struct mr_table *mrt)
114 struct mr_table *ret;
117 ret = list_entry_rcu(net->ipv6.mr6_tables.next,
118 struct mr_table, list);
120 ret = list_entry_rcu(mrt->list.next,
121 struct mr_table, list);
123 if (&ret->list == &net->ipv6.mr6_tables)
128 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
130 struct mr_table *mrt;
132 ip6mr_for_each_table(mrt, net) {
139 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
140 struct mr_table **mrt)
143 struct ip6mr_result res;
144 struct fib_lookup_arg arg = {
146 .flags = FIB_LOOKUP_NOREF,
149 /* update flow if oif or iif point to device enslaved to l3mdev */
150 l3mdev_update_flow(net, flowi6_to_flowi(flp6));
152 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
153 flowi6_to_flowi(flp6), 0, &arg);
160 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
161 int flags, struct fib_lookup_arg *arg)
163 struct ip6mr_result *res = arg->result;
164 struct mr_table *mrt;
166 switch (rule->action) {
169 case FR_ACT_UNREACHABLE:
171 case FR_ACT_PROHIBIT:
173 case FR_ACT_BLACKHOLE:
178 arg->table = fib_rule_get_table(rule, arg);
180 mrt = ip6mr_get_table(rule->fr_net, arg->table);
187 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
192 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
193 struct fib_rule_hdr *frh, struct nlattr **tb,
194 struct netlink_ext_ack *extack)
199 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
206 struct fib_rule_hdr *frh)
214 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
215 .family = RTNL_FAMILY_IP6MR,
216 .rule_size = sizeof(struct ip6mr_rule),
217 .addr_size = sizeof(struct in6_addr),
218 .action = ip6mr_rule_action,
219 .match = ip6mr_rule_match,
220 .configure = ip6mr_rule_configure,
221 .compare = ip6mr_rule_compare,
222 .fill = ip6mr_rule_fill,
223 .nlgroup = RTNLGRP_IPV6_RULE,
224 .owner = THIS_MODULE,
227 static int __net_init ip6mr_rules_init(struct net *net)
229 struct fib_rules_ops *ops;
230 struct mr_table *mrt;
233 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
237 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
239 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
245 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
249 net->ipv6.mr6_rules_ops = ops;
254 ip6mr_free_table(mrt);
257 fib_rules_unregister(ops);
261 static void __net_exit ip6mr_rules_exit(struct net *net)
263 struct mr_table *mrt, *next;
266 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
267 list_del(&mrt->list);
268 ip6mr_free_table(mrt);
270 fib_rules_unregister(net->ipv6.mr6_rules_ops);
273 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
274 struct netlink_ext_ack *extack)
276 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
279 static unsigned int ip6mr_rules_seq_read(struct net *net)
281 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
284 bool ip6mr_rule_default(const struct fib_rule *rule)
286 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
287 rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
289 EXPORT_SYMBOL(ip6mr_rule_default);
291 #define ip6mr_for_each_table(mrt, net) \
292 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
294 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
295 struct mr_table *mrt)
298 return net->ipv6.mrt6;
302 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
304 return net->ipv6.mrt6;
307 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
308 struct mr_table **mrt)
310 *mrt = net->ipv6.mrt6;
314 static int __net_init ip6mr_rules_init(struct net *net)
316 struct mr_table *mrt;
318 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
321 net->ipv6.mrt6 = mrt;
325 static void __net_exit ip6mr_rules_exit(struct net *net)
328 ip6mr_free_table(net->ipv6.mrt6);
329 net->ipv6.mrt6 = NULL;
332 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
333 struct netlink_ext_ack *extack)
338 static unsigned int ip6mr_rules_seq_read(struct net *net)
344 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
347 const struct mfc6_cache_cmp_arg *cmparg = arg->key;
348 struct mfc6_cache *c = (struct mfc6_cache *)ptr;
350 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
351 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
354 static const struct rhashtable_params ip6mr_rht_params = {
355 .head_offset = offsetof(struct mr_mfc, mnode),
356 .key_offset = offsetof(struct mfc6_cache, cmparg),
357 .key_len = sizeof(struct mfc6_cache_cmp_arg),
359 .obj_cmpfn = ip6mr_hash_cmp,
360 .automatic_shrinking = true,
363 static void ip6mr_new_table_set(struct mr_table *mrt,
366 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
367 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
371 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
372 .mf6c_origin = IN6ADDR_ANY_INIT,
373 .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
376 static struct mr_table_ops ip6mr_mr_table_ops = {
377 .rht_params = &ip6mr_rht_params,
378 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
381 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
383 struct mr_table *mrt;
385 mrt = ip6mr_get_table(net, id);
389 return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
390 ipmr_expire_process, ip6mr_new_table_set);
393 static void ip6mr_free_table(struct mr_table *mrt)
395 timer_shutdown_sync(&mrt->ipmr_expire_timer);
396 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
397 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
398 rhltable_destroy(&mrt->mfc_hash);
402 #ifdef CONFIG_PROC_FS
403 /* The /proc interfaces to multicast routing
404 * /proc/ip6_mr_cache /proc/ip6_mr_vif
407 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
410 struct mr_vif_iter *iter = seq->private;
411 struct net *net = seq_file_net(seq);
412 struct mr_table *mrt;
414 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
416 return ERR_PTR(-ENOENT);
421 return mr_vif_seq_start(seq, pos);
424 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
430 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
432 struct mr_vif_iter *iter = seq->private;
433 struct mr_table *mrt = iter->mrt;
435 if (v == SEQ_START_TOKEN) {
437 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
439 const struct vif_device *vif = v;
440 const struct net_device *vif_dev;
443 vif_dev = vif_dev_read(vif);
444 name = vif_dev ? vif_dev->name : "none";
447 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
448 vif - mrt->vif_table,
449 name, vif->bytes_in, vif->pkt_in,
450 vif->bytes_out, vif->pkt_out,
456 static const struct seq_operations ip6mr_vif_seq_ops = {
457 .start = ip6mr_vif_seq_start,
458 .next = mr_vif_seq_next,
459 .stop = ip6mr_vif_seq_stop,
460 .show = ip6mr_vif_seq_show,
463 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
465 struct net *net = seq_file_net(seq);
466 struct mr_table *mrt;
468 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
470 return ERR_PTR(-ENOENT);
472 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
475 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
479 if (v == SEQ_START_TOKEN) {
483 "Iif Pkts Bytes Wrong Oifs\n");
485 const struct mfc6_cache *mfc = v;
486 const struct mr_mfc_iter *it = seq->private;
487 struct mr_table *mrt = it->mrt;
489 seq_printf(seq, "%pI6 %pI6 %-3hd",
490 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
493 if (it->cache != &mrt->mfc_unres_queue) {
494 seq_printf(seq, " %8lu %8lu %8lu",
495 mfc->_c.mfc_un.res.pkt,
496 mfc->_c.mfc_un.res.bytes,
497 mfc->_c.mfc_un.res.wrong_if);
498 for (n = mfc->_c.mfc_un.res.minvif;
499 n < mfc->_c.mfc_un.res.maxvif; n++) {
500 if (VIF_EXISTS(mrt, n) &&
501 mfc->_c.mfc_un.res.ttls[n] < 255)
504 mfc->_c.mfc_un.res.ttls[n]);
507 /* unresolved mfc_caches don't contain
508 * pkt, bytes and wrong_if values
510 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
517 static const struct seq_operations ipmr_mfc_seq_ops = {
518 .start = ipmr_mfc_seq_start,
519 .next = mr_mfc_seq_next,
520 .stop = mr_mfc_seq_stop,
521 .show = ipmr_mfc_seq_show,
525 #ifdef CONFIG_IPV6_PIMSM_V2
527 static int pim6_rcv(struct sk_buff *skb)
529 struct pimreghdr *pim;
530 struct ipv6hdr *encap;
531 struct net_device *reg_dev = NULL;
532 struct net *net = dev_net(skb->dev);
533 struct mr_table *mrt;
534 struct flowi6 fl6 = {
535 .flowi6_iif = skb->dev->ifindex,
536 .flowi6_mark = skb->mark,
540 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
543 pim = (struct pimreghdr *)skb_transport_header(skb);
544 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
545 (pim->flags & PIM_NULL_REGISTER) ||
546 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
547 sizeof(*pim), IPPROTO_PIM,
548 csum_partial((void *)pim, sizeof(*pim), 0)) &&
549 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
552 /* check if the inner packet is destined to mcast group */
553 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
556 if (!ipv6_addr_is_multicast(&encap->daddr) ||
557 encap->payload_len == 0 ||
558 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
561 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
564 /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
565 reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
566 if (reg_vif_num >= 0)
567 reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
572 skb->mac_header = skb->network_header;
573 skb_pull(skb, (u8 *)encap - skb->data);
574 skb_reset_network_header(skb);
575 skb->protocol = htons(ETH_P_IPV6);
576 skb->ip_summed = CHECKSUM_NONE;
578 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
588 static const struct inet6_protocol pim6_protocol = {
592 /* Service routines creating virtual interfaces: PIMREG */
594 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
595 struct net_device *dev)
597 struct net *net = dev_net(dev);
598 struct mr_table *mrt;
599 struct flowi6 fl6 = {
600 .flowi6_oif = dev->ifindex,
601 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
602 .flowi6_mark = skb->mark,
605 if (!pskb_inet_may_pull(skb))
608 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
611 DEV_STATS_ADD(dev, tx_bytes, skb->len);
612 DEV_STATS_INC(dev, tx_packets);
614 ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
621 DEV_STATS_INC(dev, tx_errors);
626 static int reg_vif_get_iflink(const struct net_device *dev)
631 static const struct net_device_ops reg_vif_netdev_ops = {
632 .ndo_start_xmit = reg_vif_xmit,
633 .ndo_get_iflink = reg_vif_get_iflink,
636 static void reg_vif_setup(struct net_device *dev)
638 dev->type = ARPHRD_PIMREG;
639 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
640 dev->flags = IFF_NOARP;
641 dev->netdev_ops = ®_vif_netdev_ops;
642 dev->needs_free_netdev = true;
643 dev->features |= NETIF_F_NETNS_LOCAL;
646 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
648 struct net_device *dev;
651 if (mrt->id == RT6_TABLE_DFLT)
652 sprintf(name, "pim6reg");
654 sprintf(name, "pim6reg%u", mrt->id);
656 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
660 dev_net_set(dev, net);
662 if (register_netdevice(dev)) {
667 if (dev_open(dev, NULL))
674 unregister_netdevice(dev);
679 static int call_ip6mr_vif_entry_notifiers(struct net *net,
680 enum fib_event_type event_type,
681 struct vif_device *vif,
682 struct net_device *vif_dev,
683 mifi_t vif_index, u32 tb_id)
685 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
686 vif, vif_dev, vif_index, tb_id,
687 &net->ipv6.ipmr_seq);
690 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
691 enum fib_event_type event_type,
692 struct mfc6_cache *mfc, u32 tb_id)
694 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
695 &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
698 /* Delete a VIF entry */
699 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
700 struct list_head *head)
702 struct vif_device *v;
703 struct net_device *dev;
704 struct inet6_dev *in6_dev;
706 if (vifi < 0 || vifi >= mrt->maxvif)
707 return -EADDRNOTAVAIL;
709 v = &mrt->vif_table[vifi];
711 dev = rtnl_dereference(v->dev);
713 return -EADDRNOTAVAIL;
715 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
716 FIB_EVENT_VIF_DEL, v, dev,
718 spin_lock(&mrt_lock);
719 RCU_INIT_POINTER(v->dev, NULL);
721 #ifdef CONFIG_IPV6_PIMSM_V2
722 if (vifi == mrt->mroute_reg_vif_num) {
723 /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
724 WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
728 if (vifi + 1 == mrt->maxvif) {
730 for (tmp = vifi - 1; tmp >= 0; tmp--) {
731 if (VIF_EXISTS(mrt, tmp))
734 WRITE_ONCE(mrt->maxvif, tmp + 1);
737 spin_unlock(&mrt_lock);
739 dev_set_allmulti(dev, -1);
741 in6_dev = __in6_dev_get(dev);
743 atomic_dec(&in6_dev->cnf.mc_forwarding);
744 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
745 NETCONFA_MC_FORWARDING,
746 dev->ifindex, &in6_dev->cnf);
749 if ((v->flags & MIFF_REGISTER) && !notify)
750 unregister_netdevice_queue(dev, head);
752 netdev_put(dev, &v->dev_tracker);
756 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
758 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
760 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
763 static inline void ip6mr_cache_free(struct mfc6_cache *c)
765 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
768 /* Destroy an unresolved cache entry, killing queued skbs
769 and reporting error to netlink readers.
772 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
774 struct net *net = read_pnet(&mrt->net);
777 atomic_dec(&mrt->cache_resolve_queue_len);
779 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
780 if (ipv6_hdr(skb)->version == 0) {
781 struct nlmsghdr *nlh = skb_pull(skb,
782 sizeof(struct ipv6hdr));
783 nlh->nlmsg_type = NLMSG_ERROR;
784 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
785 skb_trim(skb, nlh->nlmsg_len);
786 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
787 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
796 /* Timer process for all the unresolved queue. */
798 static void ipmr_do_expire_process(struct mr_table *mrt)
800 unsigned long now = jiffies;
801 unsigned long expires = 10 * HZ;
802 struct mr_mfc *c, *next;
804 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
805 if (time_after(c->mfc_un.unres.expires, now)) {
807 unsigned long interval = c->mfc_un.unres.expires - now;
808 if (interval < expires)
814 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
815 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
818 if (!list_empty(&mrt->mfc_unres_queue))
819 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
822 static void ipmr_expire_process(struct timer_list *t)
824 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
826 if (!spin_trylock(&mfc_unres_lock)) {
827 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
831 if (!list_empty(&mrt->mfc_unres_queue))
832 ipmr_do_expire_process(mrt);
834 spin_unlock(&mfc_unres_lock);
837 /* Fill oifs list. It is called under locked mrt_lock. */
839 static void ip6mr_update_thresholds(struct mr_table *mrt,
840 struct mr_mfc *cache,
845 cache->mfc_un.res.minvif = MAXMIFS;
846 cache->mfc_un.res.maxvif = 0;
847 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
849 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
850 if (VIF_EXISTS(mrt, vifi) &&
851 ttls[vifi] && ttls[vifi] < 255) {
852 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
853 if (cache->mfc_un.res.minvif > vifi)
854 cache->mfc_un.res.minvif = vifi;
855 if (cache->mfc_un.res.maxvif <= vifi)
856 cache->mfc_un.res.maxvif = vifi + 1;
859 cache->mfc_un.res.lastuse = jiffies;
862 static int mif6_add(struct net *net, struct mr_table *mrt,
863 struct mif6ctl *vifc, int mrtsock)
865 int vifi = vifc->mif6c_mifi;
866 struct vif_device *v = &mrt->vif_table[vifi];
867 struct net_device *dev;
868 struct inet6_dev *in6_dev;
872 if (VIF_EXISTS(mrt, vifi))
875 switch (vifc->mif6c_flags) {
876 #ifdef CONFIG_IPV6_PIMSM_V2
879 * Special Purpose VIF in PIM
880 * All the packets will be sent to the daemon
882 if (mrt->mroute_reg_vif_num >= 0)
884 dev = ip6mr_reg_vif(net, mrt);
887 err = dev_set_allmulti(dev, 1);
889 unregister_netdevice(dev);
896 dev = dev_get_by_index(net, vifc->mif6c_pifi);
898 return -EADDRNOTAVAIL;
899 err = dev_set_allmulti(dev, 1);
909 in6_dev = __in6_dev_get(dev);
911 atomic_inc(&in6_dev->cnf.mc_forwarding);
912 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
913 NETCONFA_MC_FORWARDING,
914 dev->ifindex, &in6_dev->cnf);
917 /* Fill in the VIF structures */
918 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
919 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
922 /* And finish update writing critical data */
923 spin_lock(&mrt_lock);
924 rcu_assign_pointer(v->dev, dev);
925 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
926 #ifdef CONFIG_IPV6_PIMSM_V2
927 if (v->flags & MIFF_REGISTER)
928 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
930 if (vifi + 1 > mrt->maxvif)
931 WRITE_ONCE(mrt->maxvif, vifi + 1);
932 spin_unlock(&mrt_lock);
933 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
934 v, dev, vifi, mrt->id);
938 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
939 const struct in6_addr *origin,
940 const struct in6_addr *mcastgrp)
942 struct mfc6_cache_cmp_arg arg = {
943 .mf6c_origin = *origin,
944 .mf6c_mcastgrp = *mcastgrp,
947 return mr_mfc_find(mrt, &arg);
950 /* Look for a (*,G) entry */
951 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
952 struct in6_addr *mcastgrp,
955 struct mfc6_cache_cmp_arg arg = {
956 .mf6c_origin = in6addr_any,
957 .mf6c_mcastgrp = *mcastgrp,
960 if (ipv6_addr_any(mcastgrp))
961 return mr_mfc_find_any_parent(mrt, mifi);
962 return mr_mfc_find_any(mrt, mifi, &arg);
965 /* Look for a (S,G,iif) entry if parent != -1 */
966 static struct mfc6_cache *
967 ip6mr_cache_find_parent(struct mr_table *mrt,
968 const struct in6_addr *origin,
969 const struct in6_addr *mcastgrp,
972 struct mfc6_cache_cmp_arg arg = {
973 .mf6c_origin = *origin,
974 .mf6c_mcastgrp = *mcastgrp,
977 return mr_mfc_find_parent(mrt, &arg, parent);
980 /* Allocate a multicast cache entry */
981 static struct mfc6_cache *ip6mr_cache_alloc(void)
983 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
986 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
987 c->_c.mfc_un.res.minvif = MAXMIFS;
988 c->_c.free = ip6mr_cache_free_rcu;
989 refcount_set(&c->_c.mfc_un.res.refcount, 1);
993 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
995 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
998 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
999 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1004 * A cache entry has gone into a resolved state from queued
1007 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1008 struct mfc6_cache *uc, struct mfc6_cache *c)
1010 struct sk_buff *skb;
1013 * Play the pending entries through our router
1016 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1017 if (ipv6_hdr(skb)->version == 0) {
1018 struct nlmsghdr *nlh = skb_pull(skb,
1019 sizeof(struct ipv6hdr));
1021 if (mr_fill_mroute(mrt, skb, &c->_c,
1022 nlmsg_data(nlh)) > 0) {
1023 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1025 nlh->nlmsg_type = NLMSG_ERROR;
1026 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1027 skb_trim(skb, nlh->nlmsg_len);
1028 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1030 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1033 ip6_mr_forward(net, mrt, skb->dev, skb, c);
1040 * Bounce a cache query up to pim6sd and netlink.
1042 * Called under rcu_read_lock()
1045 static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
1046 mifi_t mifi, int assert)
1048 struct sock *mroute6_sk;
1049 struct sk_buff *skb;
1050 struct mrt6msg *msg;
1053 #ifdef CONFIG_IPV6_PIMSM_V2
1054 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE)
1055 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1059 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1064 /* I suppose that internal messages
1065 * do not require checksums */
1067 skb->ip_summed = CHECKSUM_UNNECESSARY;
1069 #ifdef CONFIG_IPV6_PIMSM_V2
1070 if (assert == MRT6MSG_WHOLEPKT || assert == MRT6MSG_WRMIFWHOLE) {
1071 /* Ugly, but we have no choice with this interface.
1072 Duplicate old header, fix length etc.
1073 And all this only to mangle msg->im6_msgtype and
1074 to set msg->im6_mbz to "mbz" :-)
1076 __skb_pull(skb, skb_network_offset(pkt));
1078 skb_push(skb, sizeof(*msg));
1079 skb_reset_transport_header(skb);
1080 msg = (struct mrt6msg *)skb_transport_header(skb);
1082 msg->im6_msgtype = assert;
1083 if (assert == MRT6MSG_WRMIFWHOLE)
1084 msg->im6_mif = mifi;
1086 msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
1088 msg->im6_src = ipv6_hdr(pkt)->saddr;
1089 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1091 skb->ip_summed = CHECKSUM_UNNECESSARY;
1096 * Copy the IP header
1099 skb_put(skb, sizeof(struct ipv6hdr));
1100 skb_reset_network_header(skb);
1101 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1106 skb_put(skb, sizeof(*msg));
1107 skb_reset_transport_header(skb);
1108 msg = (struct mrt6msg *)skb_transport_header(skb);
1111 msg->im6_msgtype = assert;
1112 msg->im6_mif = mifi;
1114 msg->im6_src = ipv6_hdr(pkt)->saddr;
1115 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1117 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1118 skb->ip_summed = CHECKSUM_UNNECESSARY;
1121 mroute6_sk = rcu_dereference(mrt->mroute_sk);
1127 mrt6msg_netlink_event(mrt, skb);
1129 /* Deliver to user space multicast routing algorithms */
1130 ret = sock_queue_rcv_skb(mroute6_sk, skb);
1133 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1140 /* Queue a packet for resolution. It gets locked cache entry! */
1141 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1142 struct sk_buff *skb, struct net_device *dev)
1144 struct mfc6_cache *c;
1148 spin_lock_bh(&mfc_unres_lock);
1149 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1150 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1151 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1159 * Create a new entry if allowable
1162 c = ip6mr_cache_alloc_unres();
1164 spin_unlock_bh(&mfc_unres_lock);
1170 /* Fill in the new cache entry */
1171 c->_c.mfc_parent = -1;
1172 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1173 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1176 * Reflect first query at pim6sd
1178 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1180 /* If the report failed throw the cache entry
1183 spin_unlock_bh(&mfc_unres_lock);
1185 ip6mr_cache_free(c);
1190 atomic_inc(&mrt->cache_resolve_queue_len);
1191 list_add(&c->_c.list, &mrt->mfc_unres_queue);
1192 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1194 ipmr_do_expire_process(mrt);
1197 /* See if we can append the packet */
1198 if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1204 skb->skb_iif = dev->ifindex;
1206 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1210 spin_unlock_bh(&mfc_unres_lock);
1215 * MFC6 cache manipulation by user space
1218 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1221 struct mfc6_cache *c;
1223 /* The entries are added/deleted only under RTNL */
1225 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1226 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1230 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1231 list_del_rcu(&c->_c.list);
1233 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1234 FIB_EVENT_ENTRY_DEL, c, mrt->id);
1235 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1236 mr_cache_put(&c->_c);
1240 static int ip6mr_device_event(struct notifier_block *this,
1241 unsigned long event, void *ptr)
1243 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1244 struct net *net = dev_net(dev);
1245 struct mr_table *mrt;
1246 struct vif_device *v;
1249 if (event != NETDEV_UNREGISTER)
1252 ip6mr_for_each_table(mrt, net) {
1253 v = &mrt->vif_table[0];
1254 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1255 if (rcu_access_pointer(v->dev) == dev)
1256 mif6_delete(mrt, ct, 1, NULL);
1263 static unsigned int ip6mr_seq_read(struct net *net)
1267 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1270 static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1271 struct netlink_ext_ack *extack)
1273 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1274 ip6mr_mr_table_iter, extack);
1277 static struct notifier_block ip6_mr_notifier = {
1278 .notifier_call = ip6mr_device_event
1281 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1282 .family = RTNL_FAMILY_IP6MR,
1283 .fib_seq_read = ip6mr_seq_read,
1284 .fib_dump = ip6mr_dump,
1285 .owner = THIS_MODULE,
1288 static int __net_init ip6mr_notifier_init(struct net *net)
1290 struct fib_notifier_ops *ops;
1292 net->ipv6.ipmr_seq = 0;
1294 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1296 return PTR_ERR(ops);
1298 net->ipv6.ip6mr_notifier_ops = ops;
1303 static void __net_exit ip6mr_notifier_exit(struct net *net)
1305 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1306 net->ipv6.ip6mr_notifier_ops = NULL;
1309 /* Setup for IP multicast routing */
1310 static int __net_init ip6mr_net_init(struct net *net)
1314 err = ip6mr_notifier_init(net);
1318 err = ip6mr_rules_init(net);
1320 goto ip6mr_rules_fail;
1322 #ifdef CONFIG_PROC_FS
1324 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1325 sizeof(struct mr_vif_iter)))
1327 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1328 sizeof(struct mr_mfc_iter)))
1329 goto proc_cache_fail;
1334 #ifdef CONFIG_PROC_FS
1336 remove_proc_entry("ip6_mr_vif", net->proc_net);
1339 ip6mr_rules_exit(net);
1343 ip6mr_notifier_exit(net);
1347 static void __net_exit ip6mr_net_exit(struct net *net)
1349 #ifdef CONFIG_PROC_FS
1350 remove_proc_entry("ip6_mr_cache", net->proc_net);
1351 remove_proc_entry("ip6_mr_vif", net->proc_net);
1353 ip6mr_notifier_exit(net);
1356 static void __net_exit ip6mr_net_exit_batch(struct list_head *net_list)
1361 list_for_each_entry(net, net_list, exit_list)
1362 ip6mr_rules_exit(net);
1366 static struct pernet_operations ip6mr_net_ops = {
1367 .init = ip6mr_net_init,
1368 .exit = ip6mr_net_exit,
1369 .exit_batch = ip6mr_net_exit_batch,
1372 int __init ip6_mr_init(void)
1376 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1377 sizeof(struct mfc6_cache),
1378 0, SLAB_HWCACHE_ALIGN,
1383 err = register_pernet_subsys(&ip6mr_net_ops);
1385 goto reg_pernet_fail;
1387 err = register_netdevice_notifier(&ip6_mr_notifier);
1389 goto reg_notif_fail;
1390 #ifdef CONFIG_IPV6_PIMSM_V2
1391 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1392 pr_err("%s: can't add PIM protocol\n", __func__);
1394 goto add_proto_fail;
1397 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1398 ip6mr_rtm_getroute, ip6mr_rtm_dumproute, 0);
1402 #ifdef CONFIG_IPV6_PIMSM_V2
1403 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1405 unregister_netdevice_notifier(&ip6_mr_notifier);
1408 unregister_pernet_subsys(&ip6mr_net_ops);
1410 kmem_cache_destroy(mrt_cachep);
1414 void ip6_mr_cleanup(void)
1416 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1417 #ifdef CONFIG_IPV6_PIMSM_V2
1418 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1420 unregister_netdevice_notifier(&ip6_mr_notifier);
1421 unregister_pernet_subsys(&ip6mr_net_ops);
1422 kmem_cache_destroy(mrt_cachep);
1425 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1426 struct mf6cctl *mfc, int mrtsock, int parent)
1428 unsigned char ttls[MAXMIFS];
1429 struct mfc6_cache *uc, *c;
1434 if (mfc->mf6cc_parent >= MAXMIFS)
1437 memset(ttls, 255, MAXMIFS);
1438 for (i = 0; i < MAXMIFS; i++) {
1439 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1443 /* The entries are added/deleted only under RTNL */
1445 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1446 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1449 spin_lock(&mrt_lock);
1450 c->_c.mfc_parent = mfc->mf6cc_parent;
1451 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1453 c->_c.mfc_flags |= MFC_STATIC;
1454 spin_unlock(&mrt_lock);
1455 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1457 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1461 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1462 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1465 c = ip6mr_cache_alloc();
1469 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1470 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1471 c->_c.mfc_parent = mfc->mf6cc_parent;
1472 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1474 c->_c.mfc_flags |= MFC_STATIC;
1476 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1479 pr_err("ip6mr: rhtable insert error %d\n", err);
1480 ip6mr_cache_free(c);
1483 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1485 /* Check to see if we resolved a queued list. If so we
1486 * need to send on the frames and tidy up.
1489 spin_lock_bh(&mfc_unres_lock);
1490 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1491 uc = (struct mfc6_cache *)_uc;
1492 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1493 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1494 list_del(&_uc->list);
1495 atomic_dec(&mrt->cache_resolve_queue_len);
1500 if (list_empty(&mrt->mfc_unres_queue))
1501 del_timer(&mrt->ipmr_expire_timer);
1502 spin_unlock_bh(&mfc_unres_lock);
1505 ip6mr_cache_resolve(net, mrt, uc, c);
1506 ip6mr_cache_free(uc);
1508 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1510 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1515 * Close the multicast socket, and clear the vif tables etc
1518 static void mroute_clean_tables(struct mr_table *mrt, int flags)
1520 struct mr_mfc *c, *tmp;
1524 /* Shut down all active vif entries */
1525 if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1526 for (i = 0; i < mrt->maxvif; i++) {
1527 if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1528 !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1529 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1531 mif6_delete(mrt, i, 0, &list);
1533 unregister_netdevice_many(&list);
1536 /* Wipe the cache */
1537 if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1538 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1539 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1540 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1542 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1543 list_del_rcu(&c->list);
1544 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1545 FIB_EVENT_ENTRY_DEL,
1546 (struct mfc6_cache *)c, mrt->id);
1547 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1552 if (flags & MRT6_FLUSH_MFC) {
1553 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1554 spin_lock_bh(&mfc_unres_lock);
1555 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1557 mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1559 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1561 spin_unlock_bh(&mfc_unres_lock);
1566 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1569 struct net *net = sock_net(sk);
1572 spin_lock(&mrt_lock);
1573 if (rtnl_dereference(mrt->mroute_sk)) {
1576 rcu_assign_pointer(mrt->mroute_sk, sk);
1577 sock_set_flag(sk, SOCK_RCU_FREE);
1578 atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
1580 spin_unlock(&mrt_lock);
1583 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1584 NETCONFA_MC_FORWARDING,
1585 NETCONFA_IFINDEX_ALL,
1586 net->ipv6.devconf_all);
1592 int ip6mr_sk_done(struct sock *sk)
1594 struct net *net = sock_net(sk);
1595 struct ipv6_devconf *devconf;
1596 struct mr_table *mrt;
1599 if (sk->sk_type != SOCK_RAW ||
1600 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1603 devconf = net->ipv6.devconf_all;
1604 if (!devconf || !atomic_read(&devconf->mc_forwarding))
1608 ip6mr_for_each_table(mrt, net) {
1609 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1610 spin_lock(&mrt_lock);
1611 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1612 /* Note that mroute_sk had SOCK_RCU_FREE set,
1613 * so the RCU grace period before sk freeing
1614 * is guaranteed by sk_destruct()
1616 atomic_dec(&devconf->mc_forwarding);
1617 spin_unlock(&mrt_lock);
1618 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1619 NETCONFA_MC_FORWARDING,
1620 NETCONFA_IFINDEX_ALL,
1621 net->ipv6.devconf_all);
1623 mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1633 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1635 struct mr_table *mrt;
1636 struct flowi6 fl6 = {
1637 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1638 .flowi6_oif = skb->dev->ifindex,
1639 .flowi6_mark = skb->mark,
1642 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1645 return rcu_access_pointer(mrt->mroute_sk);
1647 EXPORT_SYMBOL(mroute6_is_socket);
1650 * Socket options and virtual interface manipulation. The whole
1651 * virtual interface system is a complete heap, but unfortunately
1652 * that's how BSD mrouted happens to think. Maybe one day with a proper
1653 * MOSPF/PIM router set up we can clean this up.
1656 int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1657 unsigned int optlen)
1659 int ret, parent = 0;
1663 struct net *net = sock_net(sk);
1664 struct mr_table *mrt;
1666 if (sk->sk_type != SOCK_RAW ||
1667 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1670 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1674 if (optname != MRT6_INIT) {
1675 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1676 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1682 if (optlen < sizeof(int))
1685 return ip6mr_sk_init(mrt, sk);
1688 return ip6mr_sk_done(sk);
1691 if (optlen < sizeof(vif))
1693 if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1695 if (vif.mif6c_mifi >= MAXMIFS)
1698 ret = mif6_add(net, mrt, &vif,
1699 sk == rtnl_dereference(mrt->mroute_sk));
1704 if (optlen < sizeof(mifi_t))
1706 if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1709 ret = mif6_delete(mrt, mifi, 0, NULL);
1714 * Manipulate the forwarding caches. These live
1715 * in a sort of kernel/user symbiosis.
1721 case MRT6_ADD_MFC_PROXY:
1722 case MRT6_DEL_MFC_PROXY:
1723 if (optlen < sizeof(mfc))
1725 if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1728 parent = mfc.mf6cc_parent;
1730 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1731 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1733 ret = ip6mr_mfc_add(net, mrt, &mfc,
1735 rtnl_dereference(mrt->mroute_sk),
1744 if (optlen != sizeof(flags))
1746 if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1749 mroute_clean_tables(mrt, flags);
1755 * Control PIM assert (to activate pim will activate assert)
1761 if (optlen != sizeof(v))
1763 if (copy_from_sockptr(&v, optval, sizeof(v)))
1765 mrt->mroute_do_assert = v;
1769 #ifdef CONFIG_IPV6_PIMSM_V2
1775 if (optlen != sizeof(v))
1777 if (copy_from_sockptr(&v, optval, sizeof(v)))
1780 do_wrmifwhole = (v == MRT6MSG_WRMIFWHOLE);
1784 if (v != mrt->mroute_do_pim) {
1785 mrt->mroute_do_pim = v;
1786 mrt->mroute_do_assert = v;
1787 mrt->mroute_do_wrvifwhole = do_wrmifwhole;
1794 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1799 if (optlen != sizeof(u32))
1801 if (copy_from_sockptr(&v, optval, sizeof(v)))
1803 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1804 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1806 if (sk == rcu_access_pointer(mrt->mroute_sk))
1811 mrt = ip6mr_new_table(net, v);
1815 raw6_sk(sk)->ip6mr_table = v;
1821 * Spurious command, or MRT6_VERSION which you cannot
1825 return -ENOPROTOOPT;
1830 * Getsock opt support for the multicast routing system.
1833 int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1838 struct net *net = sock_net(sk);
1839 struct mr_table *mrt;
1841 if (sk->sk_type != SOCK_RAW ||
1842 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1845 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1853 #ifdef CONFIG_IPV6_PIMSM_V2
1855 val = mrt->mroute_do_pim;
1859 val = mrt->mroute_do_assert;
1862 return -ENOPROTOOPT;
1865 if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1868 olr = min_t(int, olr, sizeof(int));
1872 if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1874 if (copy_to_sockptr(optval, &val, olr))
1880 * The IP multicast ioctl support routines.
1882 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
1884 struct sioc_sg_req6 *sr;
1885 struct sioc_mif_req6 *vr;
1886 struct vif_device *vif;
1887 struct mfc6_cache *c;
1888 struct net *net = sock_net(sk);
1889 struct mr_table *mrt;
1891 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1896 case SIOCGETMIFCNT_IN6:
1897 vr = (struct sioc_mif_req6 *)arg;
1898 if (vr->mifi >= mrt->maxvif)
1900 vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
1902 vif = &mrt->vif_table[vr->mifi];
1903 if (VIF_EXISTS(mrt, vr->mifi)) {
1904 vr->icount = READ_ONCE(vif->pkt_in);
1905 vr->ocount = READ_ONCE(vif->pkt_out);
1906 vr->ibytes = READ_ONCE(vif->bytes_in);
1907 vr->obytes = READ_ONCE(vif->bytes_out);
1912 return -EADDRNOTAVAIL;
1913 case SIOCGETSGCNT_IN6:
1914 sr = (struct sioc_sg_req6 *)arg;
1917 c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
1918 &sr->grp.sin6_addr);
1920 sr->pktcnt = c->_c.mfc_un.res.pkt;
1921 sr->bytecnt = c->_c.mfc_un.res.bytes;
1922 sr->wrong_if = c->_c.mfc_un.res.wrong_if;
1927 return -EADDRNOTAVAIL;
1929 return -ENOIOCTLCMD;
1933 #ifdef CONFIG_COMPAT
1934 struct compat_sioc_sg_req6 {
1935 struct sockaddr_in6 src;
1936 struct sockaddr_in6 grp;
1937 compat_ulong_t pktcnt;
1938 compat_ulong_t bytecnt;
1939 compat_ulong_t wrong_if;
1942 struct compat_sioc_mif_req6 {
1944 compat_ulong_t icount;
1945 compat_ulong_t ocount;
1946 compat_ulong_t ibytes;
1947 compat_ulong_t obytes;
1950 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1952 struct compat_sioc_sg_req6 sr;
1953 struct compat_sioc_mif_req6 vr;
1954 struct vif_device *vif;
1955 struct mfc6_cache *c;
1956 struct net *net = sock_net(sk);
1957 struct mr_table *mrt;
1959 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1964 case SIOCGETMIFCNT_IN6:
1965 if (copy_from_user(&vr, arg, sizeof(vr)))
1967 if (vr.mifi >= mrt->maxvif)
1969 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1971 vif = &mrt->vif_table[vr.mifi];
1972 if (VIF_EXISTS(mrt, vr.mifi)) {
1973 vr.icount = READ_ONCE(vif->pkt_in);
1974 vr.ocount = READ_ONCE(vif->pkt_out);
1975 vr.ibytes = READ_ONCE(vif->bytes_in);
1976 vr.obytes = READ_ONCE(vif->bytes_out);
1979 if (copy_to_user(arg, &vr, sizeof(vr)))
1984 return -EADDRNOTAVAIL;
1985 case SIOCGETSGCNT_IN6:
1986 if (copy_from_user(&sr, arg, sizeof(sr)))
1990 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1992 sr.pktcnt = c->_c.mfc_un.res.pkt;
1993 sr.bytecnt = c->_c.mfc_un.res.bytes;
1994 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1997 if (copy_to_user(arg, &sr, sizeof(sr)))
2002 return -EADDRNOTAVAIL;
2004 return -ENOIOCTLCMD;
2009 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
2011 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2012 IPSTATS_MIB_OUTFORWDATAGRAMS);
2013 IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
2014 IPSTATS_MIB_OUTOCTETS, skb->len);
2015 return dst_output(net, sk, skb);
2019 * Processing handlers for ip6mr_forward
2022 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2023 struct sk_buff *skb, int vifi)
2025 struct vif_device *vif = &mrt->vif_table[vifi];
2026 struct net_device *vif_dev;
2027 struct ipv6hdr *ipv6h;
2028 struct dst_entry *dst;
2031 vif_dev = vif_dev_read(vif);
2035 #ifdef CONFIG_IPV6_PIMSM_V2
2036 if (vif->flags & MIFF_REGISTER) {
2037 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2038 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2039 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
2040 DEV_STATS_INC(vif_dev, tx_packets);
2041 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2046 ipv6h = ipv6_hdr(skb);
2048 fl6 = (struct flowi6) {
2049 .flowi6_oif = vif->link,
2050 .daddr = ipv6h->daddr,
2053 dst = ip6_route_output(net, NULL, &fl6);
2060 skb_dst_set(skb, dst);
2063 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2064 * not only before forwarding, but after forwarding on all output
2065 * interfaces. It is clear, if mrouter runs a multicasting
2066 * program, it should receive packets not depending to what interface
2067 * program is joined.
2068 * If we will not make it, the program will have to join on all
2069 * interfaces. On the other hand, multihoming host (or router, but
2070 * not mrouter) cannot join to more than one interface - it will
2071 * result in receiving multiple packets.
2074 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
2075 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
2077 /* We are about to write */
2078 /* XXX: extension headers? */
2079 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
2082 ipv6h = ipv6_hdr(skb);
2085 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2087 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2088 net, NULL, skb, skb->dev, vif_dev,
2089 ip6mr_forward2_finish);
2096 /* Called with rcu_read_lock() */
2097 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2101 /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
2102 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2103 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2109 /* Called under rcu_read_lock() */
2110 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2111 struct net_device *dev, struct sk_buff *skb,
2112 struct mfc6_cache *c)
2116 int true_vifi = ip6mr_find_vif(mrt, dev);
2118 vif = c->_c.mfc_parent;
2119 c->_c.mfc_un.res.pkt++;
2120 c->_c.mfc_un.res.bytes += skb->len;
2121 c->_c.mfc_un.res.lastuse = jiffies;
2123 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2124 struct mfc6_cache *cache_proxy;
2126 /* For an (*,G) entry, we only check that the incoming
2127 * interface is part of the static tree.
2129 cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2131 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2136 * Wrong interface: drop packet and (maybe) send PIM assert.
2138 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2139 c->_c.mfc_un.res.wrong_if++;
2141 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2142 /* pimsm uses asserts, when switching from RPT to SPT,
2143 so that we cannot check that packet arrived on an oif.
2144 It is bad, but otherwise we would need to move pretty
2145 large chunk of pimd to kernel. Ough... --ANK
2147 (mrt->mroute_do_pim ||
2148 c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2150 c->_c.mfc_un.res.last_assert +
2151 MFC_ASSERT_THRESH)) {
2152 c->_c.mfc_un.res.last_assert = jiffies;
2153 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2154 if (mrt->mroute_do_wrvifwhole)
2155 ip6mr_cache_report(mrt, skb, true_vifi,
2156 MRT6MSG_WRMIFWHOLE);
2162 WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2163 mrt->vif_table[vif].pkt_in + 1);
2164 WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2165 mrt->vif_table[vif].bytes_in + skb->len);
2170 if (ipv6_addr_any(&c->mf6c_origin) &&
2171 ipv6_addr_any(&c->mf6c_mcastgrp)) {
2172 if (true_vifi >= 0 &&
2173 true_vifi != c->_c.mfc_parent &&
2174 ipv6_hdr(skb)->hop_limit >
2175 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2176 /* It's an (*,*) entry and the packet is not coming from
2177 * the upstream: forward the packet to the upstream
2180 psend = c->_c.mfc_parent;
2185 for (ct = c->_c.mfc_un.res.maxvif - 1;
2186 ct >= c->_c.mfc_un.res.minvif; ct--) {
2187 /* For (*,G) entry, don't forward to the incoming interface */
2188 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2189 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2191 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2193 ip6mr_forward2(net, mrt, skb2, psend);
2200 ip6mr_forward2(net, mrt, skb, psend);
2210 * Multicast packets for forwarding arrive here
2213 int ip6_mr_input(struct sk_buff *skb)
2215 struct mfc6_cache *cache;
2216 struct net *net = dev_net(skb->dev);
2217 struct mr_table *mrt;
2218 struct flowi6 fl6 = {
2219 .flowi6_iif = skb->dev->ifindex,
2220 .flowi6_mark = skb->mark,
2223 struct net_device *dev;
2225 /* skb->dev passed in is the master dev for vrfs.
2226 * Get the proper interface that does have a vif associated with it.
2229 if (netif_is_l3_master(skb->dev)) {
2230 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2237 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2243 cache = ip6mr_cache_find(mrt,
2244 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2246 int vif = ip6mr_find_vif(mrt, dev);
2249 cache = ip6mr_cache_find_any(mrt,
2250 &ipv6_hdr(skb)->daddr,
2255 * No usable cache entry
2260 vif = ip6mr_find_vif(mrt, dev);
2262 int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2270 ip6_mr_forward(net, mrt, dev, skb, cache);
2275 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2279 struct mr_table *mrt;
2280 struct mfc6_cache *cache;
2281 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2283 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2288 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2289 if (!cache && skb->dev) {
2290 int vif = ip6mr_find_vif(mrt, skb->dev);
2293 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2298 struct sk_buff *skb2;
2299 struct ipv6hdr *iph;
2300 struct net_device *dev;
2304 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2309 /* really correct? */
2310 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2316 NETLINK_CB(skb2).portid = portid;
2317 skb_reset_transport_header(skb2);
2319 skb_put(skb2, sizeof(struct ipv6hdr));
2320 skb_reset_network_header(skb2);
2322 iph = ipv6_hdr(skb2);
2325 iph->flow_lbl[0] = 0;
2326 iph->flow_lbl[1] = 0;
2327 iph->flow_lbl[2] = 0;
2328 iph->payload_len = 0;
2329 iph->nexthdr = IPPROTO_NONE;
2331 iph->saddr = rt->rt6i_src.addr;
2332 iph->daddr = rt->rt6i_dst.addr;
2334 err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2340 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2345 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2346 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2349 struct nlmsghdr *nlh;
2353 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2357 rtm = nlmsg_data(nlh);
2358 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2359 rtm->rtm_dst_len = 128;
2360 rtm->rtm_src_len = 128;
2362 rtm->rtm_table = mrt->id;
2363 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2364 goto nla_put_failure;
2365 rtm->rtm_type = RTN_MULTICAST;
2366 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2367 if (c->_c.mfc_flags & MFC_STATIC)
2368 rtm->rtm_protocol = RTPROT_STATIC;
2370 rtm->rtm_protocol = RTPROT_MROUTED;
2373 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2374 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2375 goto nla_put_failure;
2376 err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2377 /* do not break the dump if cache is unresolved */
2378 if (err < 0 && err != -ENOENT)
2379 goto nla_put_failure;
2381 nlmsg_end(skb, nlh);
2385 nlmsg_cancel(skb, nlh);
2389 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2390 u32 portid, u32 seq, struct mr_mfc *c,
2393 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2397 static int mr6_msgsize(bool unresolved, int maxvif)
2400 NLMSG_ALIGN(sizeof(struct rtmsg))
2401 + nla_total_size(4) /* RTA_TABLE */
2402 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2403 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2408 + nla_total_size(4) /* RTA_IIF */
2409 + nla_total_size(0) /* RTA_MULTIPATH */
2410 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2412 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2418 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2421 struct net *net = read_pnet(&mrt->net);
2422 struct sk_buff *skb;
2425 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2430 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2434 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2440 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2443 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2446 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2447 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */
2448 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */
2449 /* IP6MRA_CREPORT_SRC_ADDR */
2450 + nla_total_size(sizeof(struct in6_addr))
2451 /* IP6MRA_CREPORT_DST_ADDR */
2452 + nla_total_size(sizeof(struct in6_addr))
2453 /* IP6MRA_CREPORT_PKT */
2454 + nla_total_size(payloadlen)
2460 static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2462 struct net *net = read_pnet(&mrt->net);
2463 struct nlmsghdr *nlh;
2464 struct rtgenmsg *rtgenm;
2465 struct mrt6msg *msg;
2466 struct sk_buff *skb;
2470 payloadlen = pkt->len - sizeof(struct mrt6msg);
2471 msg = (struct mrt6msg *)skb_transport_header(pkt);
2473 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2477 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2478 sizeof(struct rtgenmsg), 0);
2481 rtgenm = nlmsg_data(nlh);
2482 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2483 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2484 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2485 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2487 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2489 goto nla_put_failure;
2491 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2492 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2493 nla_data(nla), payloadlen))
2494 goto nla_put_failure;
2496 nlmsg_end(skb, nlh);
2498 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2502 nlmsg_cancel(skb, nlh);
2505 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2508 static const struct nla_policy ip6mr_getroute_policy[RTA_MAX + 1] = {
2509 [RTA_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2510 [RTA_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
2511 [RTA_TABLE] = { .type = NLA_U32 },
2514 static int ip6mr_rtm_valid_getroute_req(struct sk_buff *skb,
2515 const struct nlmsghdr *nlh,
2517 struct netlink_ext_ack *extack)
2522 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, ip6mr_getroute_policy,
2527 rtm = nlmsg_data(nlh);
2528 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
2529 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
2530 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2531 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2532 NL_SET_ERR_MSG_MOD(extack,
2533 "Invalid values in header for multicast route get request");
2537 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2538 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2539 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
2546 static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2547 struct netlink_ext_ack *extack)
2549 struct net *net = sock_net(in_skb->sk);
2550 struct in6_addr src = {}, grp = {};
2551 struct nlattr *tb[RTA_MAX + 1];
2552 struct mfc6_cache *cache;
2553 struct mr_table *mrt;
2554 struct sk_buff *skb;
2558 err = ip6mr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2563 src = nla_get_in6_addr(tb[RTA_SRC]);
2565 grp = nla_get_in6_addr(tb[RTA_DST]);
2566 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
2568 mrt = ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
2570 NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
2574 /* entries are added/deleted only under RTNL */
2576 cache = ip6mr_cache_find(mrt, &src, &grp);
2579 NL_SET_ERR_MSG_MOD(extack, "MR cache entry not found");
2583 skb = nlmsg_new(mr6_msgsize(false, mrt->maxvif), GFP_KERNEL);
2587 err = ip6mr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2588 nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0);
2594 return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2597 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2599 const struct nlmsghdr *nlh = cb->nlh;
2600 struct fib_dump_filter filter = {};
2603 if (cb->strict_check) {
2604 err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2610 if (filter.table_id) {
2611 struct mr_table *mrt;
2613 mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2615 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2618 NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2621 err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2622 &mfc_unres_lock, &filter);
2623 return skb->len ? : err;
2626 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2627 _ip6mr_fill_mroute, &mfc_unres_lock, &filter);