2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <asm/system.h>
20 #include <asm/uaccess.h>
21 #include <linux/types.h>
22 #include <linux/sched.h>
23 #include <linux/errno.h>
24 #include <linux/timer.h>
26 #include <linux/kernel.h>
27 #include <linux/fcntl.h>
28 #include <linux/stat.h>
29 #include <linux/socket.h>
30 #include <linux/inet.h>
31 #include <linux/netdevice.h>
32 #include <linux/inetdevice.h>
33 #include <linux/proc_fs.h>
34 #include <linux/seq_file.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <linux/compat.h>
38 #include <net/protocol.h>
39 #include <linux/skbuff.h>
42 #include <linux/notifier.h>
43 #include <linux/if_arp.h>
44 #include <net/checksum.h>
45 #include <net/netlink.h>
46 #include <net/fib_rules.h>
49 #include <net/ip6_route.h>
50 #include <linux/mroute6.h>
51 #include <linux/pim.h>
52 #include <net/addrconf.h>
53 #include <linux/netfilter_ipv6.h>
54 #include <linux/export.h>
55 #include <net/ip6_checksum.h>
58 struct list_head list;
63 struct sock *mroute6_sk;
64 struct timer_list ipmr_expire_timer;
65 struct list_head mfc6_unres_queue;
66 struct list_head mfc6_cache_array[MFC6_LINES];
67 struct mif_device vif6_table[MAXMIFS];
69 atomic_t cache_resolve_queue_len;
72 #ifdef CONFIG_IPV6_PIMSM_V2
73 int mroute_reg_vif_num;
78 struct fib_rule common;
82 struct mr6_table *mrt;
85 /* Big lock, protecting vif table, mrt cache and mroute socket state.
86 Note that the changes are semaphored via rtnl_lock.
89 static DEFINE_RWLOCK(mrt_lock);
92 * Multicast router control variables
95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock);
100 /* We return to original Alan's scheme. Hash table of resolved
101 entries is changed only in process context and protected
102 with weak lock mrt_lock. Queue of unresolved entries is protected
103 with strong spinlock mfc_unres_lock.
105 In this case data path is free of exclusive locks at all.
108 static struct kmem_cache *mrt_cachep __read_mostly;
110 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111 static void ip6mr_free_table(struct mr6_table *mrt);
113 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114 struct sk_buff *skb, struct mfc6_cache *cache);
115 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 mifi_t mifi, int assert);
117 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
118 struct mfc6_cache *c, struct rtmsg *rtm);
119 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
120 struct netlink_callback *cb);
121 static void mroute_clean_tables(struct mr6_table *mrt);
122 static void ipmr_expire_process(unsigned long arg);
124 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
125 #define ip6mr_for_each_table(mrt, net) \
126 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
128 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
130 struct mr6_table *mrt;
132 ip6mr_for_each_table(mrt, net) {
139 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
140 struct mr6_table **mrt)
142 struct ip6mr_result res;
143 struct fib_lookup_arg arg = { .result = &res, };
146 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
147 flowi6_to_flowi(flp6), 0, &arg);
154 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
155 int flags, struct fib_lookup_arg *arg)
157 struct ip6mr_result *res = arg->result;
158 struct mr6_table *mrt;
160 switch (rule->action) {
163 case FR_ACT_UNREACHABLE:
165 case FR_ACT_PROHIBIT:
167 case FR_ACT_BLACKHOLE:
172 mrt = ip6mr_get_table(rule->fr_net, rule->table);
179 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
184 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
188 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
189 struct fib_rule_hdr *frh, struct nlattr **tb)
194 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
200 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
201 struct fib_rule_hdr *frh)
209 static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
210 .family = RTNL_FAMILY_IP6MR,
211 .rule_size = sizeof(struct ip6mr_rule),
212 .addr_size = sizeof(struct in6_addr),
213 .action = ip6mr_rule_action,
214 .match = ip6mr_rule_match,
215 .configure = ip6mr_rule_configure,
216 .compare = ip6mr_rule_compare,
217 .default_pref = fib_default_rule_pref,
218 .fill = ip6mr_rule_fill,
219 .nlgroup = RTNLGRP_IPV6_RULE,
220 .policy = ip6mr_rule_policy,
221 .owner = THIS_MODULE,
224 static int __net_init ip6mr_rules_init(struct net *net)
226 struct fib_rules_ops *ops;
227 struct mr6_table *mrt;
230 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
234 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
236 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
242 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
246 net->ipv6.mr6_rules_ops = ops;
252 fib_rules_unregister(ops);
256 static void __net_exit ip6mr_rules_exit(struct net *net)
258 struct mr6_table *mrt, *next;
260 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
261 list_del(&mrt->list);
262 ip6mr_free_table(mrt);
264 fib_rules_unregister(net->ipv6.mr6_rules_ops);
267 #define ip6mr_for_each_table(mrt, net) \
268 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
270 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
272 return net->ipv6.mrt6;
275 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
276 struct mr6_table **mrt)
278 *mrt = net->ipv6.mrt6;
282 static int __net_init ip6mr_rules_init(struct net *net)
284 net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
285 return net->ipv6.mrt6 ? 0 : -ENOMEM;
288 static void __net_exit ip6mr_rules_exit(struct net *net)
290 ip6mr_free_table(net->ipv6.mrt6);
294 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
296 struct mr6_table *mrt;
299 mrt = ip6mr_get_table(net, id);
303 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
307 write_pnet(&mrt->net, net);
309 /* Forwarding cache */
310 for (i = 0; i < MFC6_LINES; i++)
311 INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
313 INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
315 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
318 #ifdef CONFIG_IPV6_PIMSM_V2
319 mrt->mroute_reg_vif_num = -1;
321 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
322 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
327 static void ip6mr_free_table(struct mr6_table *mrt)
329 del_timer(&mrt->ipmr_expire_timer);
330 mroute_clean_tables(mrt);
334 #ifdef CONFIG_PROC_FS
336 struct ipmr_mfc_iter {
337 struct seq_net_private p;
338 struct mr6_table *mrt;
339 struct list_head *cache;
344 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
345 struct ipmr_mfc_iter *it, loff_t pos)
347 struct mr6_table *mrt = it->mrt;
348 struct mfc6_cache *mfc;
350 read_lock(&mrt_lock);
351 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
352 it->cache = &mrt->mfc6_cache_array[it->ct];
353 list_for_each_entry(mfc, it->cache, list)
357 read_unlock(&mrt_lock);
359 spin_lock_bh(&mfc_unres_lock);
360 it->cache = &mrt->mfc6_unres_queue;
361 list_for_each_entry(mfc, it->cache, list)
364 spin_unlock_bh(&mfc_unres_lock);
371 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
374 struct ipmr_vif_iter {
375 struct seq_net_private p;
376 struct mr6_table *mrt;
380 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
381 struct ipmr_vif_iter *iter,
384 struct mr6_table *mrt = iter->mrt;
386 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
387 if (!MIF_EXISTS(mrt, iter->ct))
390 return &mrt->vif6_table[iter->ct];
395 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
398 struct ipmr_vif_iter *iter = seq->private;
399 struct net *net = seq_file_net(seq);
400 struct mr6_table *mrt;
402 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
404 return ERR_PTR(-ENOENT);
408 read_lock(&mrt_lock);
409 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
413 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
415 struct ipmr_vif_iter *iter = seq->private;
416 struct net *net = seq_file_net(seq);
417 struct mr6_table *mrt = iter->mrt;
420 if (v == SEQ_START_TOKEN)
421 return ip6mr_vif_seq_idx(net, iter, 0);
423 while (++iter->ct < mrt->maxvif) {
424 if (!MIF_EXISTS(mrt, iter->ct))
426 return &mrt->vif6_table[iter->ct];
431 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
434 read_unlock(&mrt_lock);
437 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
439 struct ipmr_vif_iter *iter = seq->private;
440 struct mr6_table *mrt = iter->mrt;
442 if (v == SEQ_START_TOKEN) {
444 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
446 const struct mif_device *vif = v;
447 const char *name = vif->dev ? vif->dev->name : "none";
450 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
451 vif - mrt->vif6_table,
452 name, vif->bytes_in, vif->pkt_in,
453 vif->bytes_out, vif->pkt_out,
459 static const struct seq_operations ip6mr_vif_seq_ops = {
460 .start = ip6mr_vif_seq_start,
461 .next = ip6mr_vif_seq_next,
462 .stop = ip6mr_vif_seq_stop,
463 .show = ip6mr_vif_seq_show,
466 static int ip6mr_vif_open(struct inode *inode, struct file *file)
468 return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
469 sizeof(struct ipmr_vif_iter));
472 static const struct file_operations ip6mr_vif_fops = {
473 .owner = THIS_MODULE,
474 .open = ip6mr_vif_open,
477 .release = seq_release_net,
480 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
482 struct ipmr_mfc_iter *it = seq->private;
483 struct net *net = seq_file_net(seq);
484 struct mr6_table *mrt;
486 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
488 return ERR_PTR(-ENOENT);
491 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
495 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
497 struct mfc6_cache *mfc = v;
498 struct ipmr_mfc_iter *it = seq->private;
499 struct net *net = seq_file_net(seq);
500 struct mr6_table *mrt = it->mrt;
504 if (v == SEQ_START_TOKEN)
505 return ipmr_mfc_seq_idx(net, seq->private, 0);
507 if (mfc->list.next != it->cache)
508 return list_entry(mfc->list.next, struct mfc6_cache, list);
510 if (it->cache == &mrt->mfc6_unres_queue)
513 BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
515 while (++it->ct < MFC6_LINES) {
516 it->cache = &mrt->mfc6_cache_array[it->ct];
517 if (list_empty(it->cache))
519 return list_first_entry(it->cache, struct mfc6_cache, list);
522 /* exhausted cache_array, show unresolved */
523 read_unlock(&mrt_lock);
524 it->cache = &mrt->mfc6_unres_queue;
527 spin_lock_bh(&mfc_unres_lock);
528 if (!list_empty(it->cache))
529 return list_first_entry(it->cache, struct mfc6_cache, list);
532 spin_unlock_bh(&mfc_unres_lock);
538 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
540 struct ipmr_mfc_iter *it = seq->private;
541 struct mr6_table *mrt = it->mrt;
543 if (it->cache == &mrt->mfc6_unres_queue)
544 spin_unlock_bh(&mfc_unres_lock);
545 else if (it->cache == mrt->mfc6_cache_array)
546 read_unlock(&mrt_lock);
549 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
553 if (v == SEQ_START_TOKEN) {
557 "Iif Pkts Bytes Wrong Oifs\n");
559 const struct mfc6_cache *mfc = v;
560 const struct ipmr_mfc_iter *it = seq->private;
561 struct mr6_table *mrt = it->mrt;
563 seq_printf(seq, "%pI6 %pI6 %-3hd",
564 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
567 if (it->cache != &mrt->mfc6_unres_queue) {
568 seq_printf(seq, " %8lu %8lu %8lu",
570 mfc->mfc_un.res.bytes,
571 mfc->mfc_un.res.wrong_if);
572 for (n = mfc->mfc_un.res.minvif;
573 n < mfc->mfc_un.res.maxvif; n++) {
574 if (MIF_EXISTS(mrt, n) &&
575 mfc->mfc_un.res.ttls[n] < 255)
578 n, mfc->mfc_un.res.ttls[n]);
581 /* unresolved mfc_caches don't contain
582 * pkt, bytes and wrong_if values
584 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
591 static const struct seq_operations ipmr_mfc_seq_ops = {
592 .start = ipmr_mfc_seq_start,
593 .next = ipmr_mfc_seq_next,
594 .stop = ipmr_mfc_seq_stop,
595 .show = ipmr_mfc_seq_show,
598 static int ipmr_mfc_open(struct inode *inode, struct file *file)
600 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
601 sizeof(struct ipmr_mfc_iter));
604 static const struct file_operations ip6mr_mfc_fops = {
605 .owner = THIS_MODULE,
606 .open = ipmr_mfc_open,
609 .release = seq_release_net,
613 #ifdef CONFIG_IPV6_PIMSM_V2
615 static int pim6_rcv(struct sk_buff *skb)
617 struct pimreghdr *pim;
618 struct ipv6hdr *encap;
619 struct net_device *reg_dev = NULL;
620 struct net *net = dev_net(skb->dev);
621 struct mr6_table *mrt;
622 struct flowi6 fl6 = {
623 .flowi6_iif = skb->dev->ifindex,
624 .flowi6_mark = skb->mark,
628 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
631 pim = (struct pimreghdr *)skb_transport_header(skb);
632 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
633 (pim->flags & PIM_NULL_REGISTER) ||
634 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
635 sizeof(*pim), IPPROTO_PIM,
636 csum_partial((void *)pim, sizeof(*pim), 0)) &&
637 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
640 /* check if the inner packet is destined to mcast group */
641 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
644 if (!ipv6_addr_is_multicast(&encap->daddr) ||
645 encap->payload_len == 0 ||
646 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
649 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
651 reg_vif_num = mrt->mroute_reg_vif_num;
653 read_lock(&mrt_lock);
654 if (reg_vif_num >= 0)
655 reg_dev = mrt->vif6_table[reg_vif_num].dev;
658 read_unlock(&mrt_lock);
663 skb->mac_header = skb->network_header;
664 skb_pull(skb, (u8 *)encap - skb->data);
665 skb_reset_network_header(skb);
666 skb->protocol = htons(ETH_P_IPV6);
667 skb->ip_summed = CHECKSUM_NONE;
668 skb->pkt_type = PACKET_HOST;
670 skb_tunnel_rx(skb, reg_dev);
681 static const struct inet6_protocol pim6_protocol = {
685 /* Service routines creating virtual interfaces: PIMREG */
687 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
688 struct net_device *dev)
690 struct net *net = dev_net(dev);
691 struct mr6_table *mrt;
692 struct flowi6 fl6 = {
693 .flowi6_oif = dev->ifindex,
694 .flowi6_iif = skb->skb_iif,
695 .flowi6_mark = skb->mark,
699 err = ip6mr_fib_lookup(net, &fl6, &mrt);
705 read_lock(&mrt_lock);
706 dev->stats.tx_bytes += skb->len;
707 dev->stats.tx_packets++;
708 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
709 read_unlock(&mrt_lock);
714 static const struct net_device_ops reg_vif_netdev_ops = {
715 .ndo_start_xmit = reg_vif_xmit,
718 static void reg_vif_setup(struct net_device *dev)
720 dev->type = ARPHRD_PIMREG;
721 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
722 dev->flags = IFF_NOARP;
723 dev->netdev_ops = ®_vif_netdev_ops;
724 dev->destructor = free_netdev;
725 dev->features |= NETIF_F_NETNS_LOCAL;
728 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
730 struct net_device *dev;
733 if (mrt->id == RT6_TABLE_DFLT)
734 sprintf(name, "pim6reg");
736 sprintf(name, "pim6reg%u", mrt->id);
738 dev = alloc_netdev(0, name, reg_vif_setup);
742 dev_net_set(dev, net);
744 if (register_netdevice(dev)) {
757 /* allow the register to be completed before unregistering. */
761 unregister_netdevice(dev);
770 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
772 struct mif_device *v;
773 struct net_device *dev;
774 struct inet6_dev *in6_dev;
776 if (vifi < 0 || vifi >= mrt->maxvif)
777 return -EADDRNOTAVAIL;
779 v = &mrt->vif6_table[vifi];
781 write_lock_bh(&mrt_lock);
786 write_unlock_bh(&mrt_lock);
787 return -EADDRNOTAVAIL;
790 #ifdef CONFIG_IPV6_PIMSM_V2
791 if (vifi == mrt->mroute_reg_vif_num)
792 mrt->mroute_reg_vif_num = -1;
795 if (vifi + 1 == mrt->maxvif) {
797 for (tmp = vifi - 1; tmp >= 0; tmp--) {
798 if (MIF_EXISTS(mrt, tmp))
801 mrt->maxvif = tmp + 1;
804 write_unlock_bh(&mrt_lock);
806 dev_set_allmulti(dev, -1);
808 in6_dev = __in6_dev_get(dev);
810 in6_dev->cnf.mc_forwarding--;
812 if (v->flags & MIFF_REGISTER)
813 unregister_netdevice_queue(dev, head);
819 static inline void ip6mr_cache_free(struct mfc6_cache *c)
821 kmem_cache_free(mrt_cachep, c);
824 /* Destroy an unresolved cache entry, killing queued skbs
825 and reporting error to netlink readers.
828 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
830 struct net *net = read_pnet(&mrt->net);
833 atomic_dec(&mrt->cache_resolve_queue_len);
835 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
836 if (ipv6_hdr(skb)->version == 0) {
837 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
838 nlh->nlmsg_type = NLMSG_ERROR;
839 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
840 skb_trim(skb, nlh->nlmsg_len);
841 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
842 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
851 /* Timer process for all the unresolved queue. */
853 static void ipmr_do_expire_process(struct mr6_table *mrt)
855 unsigned long now = jiffies;
856 unsigned long expires = 10 * HZ;
857 struct mfc6_cache *c, *next;
859 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
860 if (time_after(c->mfc_un.unres.expires, now)) {
862 unsigned long interval = c->mfc_un.unres.expires - now;
863 if (interval < expires)
869 ip6mr_destroy_unres(mrt, c);
872 if (!list_empty(&mrt->mfc6_unres_queue))
873 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
876 static void ipmr_expire_process(unsigned long arg)
878 struct mr6_table *mrt = (struct mr6_table *)arg;
880 if (!spin_trylock(&mfc_unres_lock)) {
881 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
885 if (!list_empty(&mrt->mfc6_unres_queue))
886 ipmr_do_expire_process(mrt);
888 spin_unlock(&mfc_unres_lock);
891 /* Fill oifs list. It is called under write locked mrt_lock. */
893 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
898 cache->mfc_un.res.minvif = MAXMIFS;
899 cache->mfc_un.res.maxvif = 0;
900 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
902 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
903 if (MIF_EXISTS(mrt, vifi) &&
904 ttls[vifi] && ttls[vifi] < 255) {
905 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
906 if (cache->mfc_un.res.minvif > vifi)
907 cache->mfc_un.res.minvif = vifi;
908 if (cache->mfc_un.res.maxvif <= vifi)
909 cache->mfc_un.res.maxvif = vifi + 1;
914 static int mif6_add(struct net *net, struct mr6_table *mrt,
915 struct mif6ctl *vifc, int mrtsock)
917 int vifi = vifc->mif6c_mifi;
918 struct mif_device *v = &mrt->vif6_table[vifi];
919 struct net_device *dev;
920 struct inet6_dev *in6_dev;
924 if (MIF_EXISTS(mrt, vifi))
927 switch (vifc->mif6c_flags) {
928 #ifdef CONFIG_IPV6_PIMSM_V2
931 * Special Purpose VIF in PIM
932 * All the packets will be sent to the daemon
934 if (mrt->mroute_reg_vif_num >= 0)
936 dev = ip6mr_reg_vif(net, mrt);
939 err = dev_set_allmulti(dev, 1);
941 unregister_netdevice(dev);
948 dev = dev_get_by_index(net, vifc->mif6c_pifi);
950 return -EADDRNOTAVAIL;
951 err = dev_set_allmulti(dev, 1);
961 in6_dev = __in6_dev_get(dev);
963 in6_dev->cnf.mc_forwarding++;
966 * Fill in the VIF structures
968 v->rate_limit = vifc->vifc_rate_limit;
969 v->flags = vifc->mif6c_flags;
971 v->flags |= VIFF_STATIC;
972 v->threshold = vifc->vifc_threshold;
977 v->link = dev->ifindex;
978 if (v->flags & MIFF_REGISTER)
979 v->link = dev->iflink;
981 /* And finish update writing critical data */
982 write_lock_bh(&mrt_lock);
984 #ifdef CONFIG_IPV6_PIMSM_V2
985 if (v->flags & MIFF_REGISTER)
986 mrt->mroute_reg_vif_num = vifi;
988 if (vifi + 1 > mrt->maxvif)
989 mrt->maxvif = vifi + 1;
990 write_unlock_bh(&mrt_lock);
994 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
995 const struct in6_addr *origin,
996 const struct in6_addr *mcastgrp)
998 int line = MFC6_HASH(mcastgrp, origin);
999 struct mfc6_cache *c;
1001 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1002 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1003 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1010 * Allocate a multicast cache entry
1012 static struct mfc6_cache *ip6mr_cache_alloc(void)
1014 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1017 c->mfc_un.res.minvif = MAXMIFS;
1021 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1023 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1026 skb_queue_head_init(&c->mfc_un.unres.unresolved);
1027 c->mfc_un.unres.expires = jiffies + 10 * HZ;
1032 * A cache entry has gone into a resolved state from queued
1035 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1036 struct mfc6_cache *uc, struct mfc6_cache *c)
1038 struct sk_buff *skb;
1041 * Play the pending entries through our router
1044 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1045 if (ipv6_hdr(skb)->version == 0) {
1046 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1048 if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1049 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1051 nlh->nlmsg_type = NLMSG_ERROR;
1052 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1053 skb_trim(skb, nlh->nlmsg_len);
1054 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1056 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1058 ip6_mr_forward(net, mrt, skb, c);
1063 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1064 * expects the following bizarre scheme.
1066 * Called under mrt_lock.
1069 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1070 mifi_t mifi, int assert)
1072 struct sk_buff *skb;
1073 struct mrt6msg *msg;
1076 #ifdef CONFIG_IPV6_PIMSM_V2
1077 if (assert == MRT6MSG_WHOLEPKT)
1078 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1082 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1087 /* I suppose that internal messages
1088 * do not require checksums */
1090 skb->ip_summed = CHECKSUM_UNNECESSARY;
1092 #ifdef CONFIG_IPV6_PIMSM_V2
1093 if (assert == MRT6MSG_WHOLEPKT) {
1094 /* Ugly, but we have no choice with this interface.
1095 Duplicate old header, fix length etc.
1096 And all this only to mangle msg->im6_msgtype and
1097 to set msg->im6_mbz to "mbz" :-)
1099 skb_push(skb, -skb_network_offset(pkt));
1101 skb_push(skb, sizeof(*msg));
1102 skb_reset_transport_header(skb);
1103 msg = (struct mrt6msg *)skb_transport_header(skb);
1105 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1106 msg->im6_mif = mrt->mroute_reg_vif_num;
1108 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1109 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1111 skb->ip_summed = CHECKSUM_UNNECESSARY;
1116 * Copy the IP header
1119 skb_put(skb, sizeof(struct ipv6hdr));
1120 skb_reset_network_header(skb);
1121 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1126 skb_put(skb, sizeof(*msg));
1127 skb_reset_transport_header(skb);
1128 msg = (struct mrt6msg *)skb_transport_header(skb);
1131 msg->im6_msgtype = assert;
1132 msg->im6_mif = mifi;
1134 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1135 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1137 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1138 skb->ip_summed = CHECKSUM_UNNECESSARY;
1141 if (mrt->mroute6_sk == NULL) {
1147 * Deliver to user space multicast routing algorithms
1149 ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1151 if (net_ratelimit())
1152 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1160 * Queue a packet for resolution. It gets locked cache entry!
1164 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1168 struct mfc6_cache *c;
1170 spin_lock_bh(&mfc_unres_lock);
1171 list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1172 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1173 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1181 * Create a new entry if allowable
1184 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1185 (c = ip6mr_cache_alloc_unres()) == NULL) {
1186 spin_unlock_bh(&mfc_unres_lock);
1193 * Fill in the new cache entry
1195 c->mf6c_parent = -1;
1196 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1197 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1200 * Reflect first query at pim6sd
1202 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1204 /* If the report failed throw the cache entry
1207 spin_unlock_bh(&mfc_unres_lock);
1209 ip6mr_cache_free(c);
1214 atomic_inc(&mrt->cache_resolve_queue_len);
1215 list_add(&c->list, &mrt->mfc6_unres_queue);
1217 ipmr_do_expire_process(mrt);
1221 * See if we can append the packet
1223 if (c->mfc_un.unres.unresolved.qlen > 3) {
1227 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1231 spin_unlock_bh(&mfc_unres_lock);
1236 * MFC6 cache manipulation by user space
1239 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1242 struct mfc6_cache *c, *next;
1244 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1246 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1247 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1248 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1249 write_lock_bh(&mrt_lock);
1251 write_unlock_bh(&mrt_lock);
1253 ip6mr_cache_free(c);
1260 static int ip6mr_device_event(struct notifier_block *this,
1261 unsigned long event, void *ptr)
1263 struct net_device *dev = ptr;
1264 struct net *net = dev_net(dev);
1265 struct mr6_table *mrt;
1266 struct mif_device *v;
1270 if (event != NETDEV_UNREGISTER)
1273 ip6mr_for_each_table(mrt, net) {
1274 v = &mrt->vif6_table[0];
1275 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1277 mif6_delete(mrt, ct, &list);
1280 unregister_netdevice_many(&list);
1285 static struct notifier_block ip6_mr_notifier = {
1286 .notifier_call = ip6mr_device_event
1290 * Setup for IP multicast routing
1293 static int __net_init ip6mr_net_init(struct net *net)
1297 err = ip6mr_rules_init(net);
1301 #ifdef CONFIG_PROC_FS
1303 if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1305 if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1306 goto proc_cache_fail;
1311 #ifdef CONFIG_PROC_FS
1313 proc_net_remove(net, "ip6_mr_vif");
1315 ip6mr_rules_exit(net);
1321 static void __net_exit ip6mr_net_exit(struct net *net)
1323 #ifdef CONFIG_PROC_FS
1324 proc_net_remove(net, "ip6_mr_cache");
1325 proc_net_remove(net, "ip6_mr_vif");
1327 ip6mr_rules_exit(net);
1330 static struct pernet_operations ip6mr_net_ops = {
1331 .init = ip6mr_net_init,
1332 .exit = ip6mr_net_exit,
1335 int __init ip6_mr_init(void)
1339 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1340 sizeof(struct mfc6_cache),
1341 0, SLAB_HWCACHE_ALIGN,
1346 err = register_pernet_subsys(&ip6mr_net_ops);
1348 goto reg_pernet_fail;
1350 err = register_netdevice_notifier(&ip6_mr_notifier);
1352 goto reg_notif_fail;
1353 #ifdef CONFIG_IPV6_PIMSM_V2
1354 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1355 printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1357 goto add_proto_fail;
1360 rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1361 ip6mr_rtm_dumproute, NULL);
1363 #ifdef CONFIG_IPV6_PIMSM_V2
1365 unregister_netdevice_notifier(&ip6_mr_notifier);
1368 unregister_pernet_subsys(&ip6mr_net_ops);
1370 kmem_cache_destroy(mrt_cachep);
1374 void ip6_mr_cleanup(void)
1376 unregister_netdevice_notifier(&ip6_mr_notifier);
1377 unregister_pernet_subsys(&ip6mr_net_ops);
1378 kmem_cache_destroy(mrt_cachep);
1381 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1382 struct mf6cctl *mfc, int mrtsock)
1386 struct mfc6_cache *uc, *c;
1387 unsigned char ttls[MAXMIFS];
1390 if (mfc->mf6cc_parent >= MAXMIFS)
1393 memset(ttls, 255, MAXMIFS);
1394 for (i = 0; i < MAXMIFS; i++) {
1395 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1400 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1402 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1403 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1404 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1411 write_lock_bh(&mrt_lock);
1412 c->mf6c_parent = mfc->mf6cc_parent;
1413 ip6mr_update_thresholds(mrt, c, ttls);
1415 c->mfc_flags |= MFC_STATIC;
1416 write_unlock_bh(&mrt_lock);
1420 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1423 c = ip6mr_cache_alloc();
1427 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1428 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1429 c->mf6c_parent = mfc->mf6cc_parent;
1430 ip6mr_update_thresholds(mrt, c, ttls);
1432 c->mfc_flags |= MFC_STATIC;
1434 write_lock_bh(&mrt_lock);
1435 list_add(&c->list, &mrt->mfc6_cache_array[line]);
1436 write_unlock_bh(&mrt_lock);
1439 * Check to see if we resolved a queued list. If so we
1440 * need to send on the frames and tidy up.
1443 spin_lock_bh(&mfc_unres_lock);
1444 list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1445 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1446 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1447 list_del(&uc->list);
1448 atomic_dec(&mrt->cache_resolve_queue_len);
1453 if (list_empty(&mrt->mfc6_unres_queue))
1454 del_timer(&mrt->ipmr_expire_timer);
1455 spin_unlock_bh(&mfc_unres_lock);
1458 ip6mr_cache_resolve(net, mrt, uc, c);
1459 ip6mr_cache_free(uc);
1465 * Close the multicast socket, and clear the vif tables etc
1468 static void mroute_clean_tables(struct mr6_table *mrt)
1472 struct mfc6_cache *c, *next;
1475 * Shut down all active vif entries
1477 for (i = 0; i < mrt->maxvif; i++) {
1478 if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1479 mif6_delete(mrt, i, &list);
1481 unregister_netdevice_many(&list);
1486 for (i = 0; i < MFC6_LINES; i++) {
1487 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1488 if (c->mfc_flags & MFC_STATIC)
1490 write_lock_bh(&mrt_lock);
1492 write_unlock_bh(&mrt_lock);
1494 ip6mr_cache_free(c);
1498 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1499 spin_lock_bh(&mfc_unres_lock);
1500 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1502 ip6mr_destroy_unres(mrt, c);
1504 spin_unlock_bh(&mfc_unres_lock);
1508 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1511 struct net *net = sock_net(sk);
1514 write_lock_bh(&mrt_lock);
1515 if (likely(mrt->mroute6_sk == NULL)) {
1516 mrt->mroute6_sk = sk;
1517 net->ipv6.devconf_all->mc_forwarding++;
1521 write_unlock_bh(&mrt_lock);
1528 int ip6mr_sk_done(struct sock *sk)
1531 struct net *net = sock_net(sk);
1532 struct mr6_table *mrt;
1535 ip6mr_for_each_table(mrt, net) {
1536 if (sk == mrt->mroute6_sk) {
1537 write_lock_bh(&mrt_lock);
1538 mrt->mroute6_sk = NULL;
1539 net->ipv6.devconf_all->mc_forwarding--;
1540 write_unlock_bh(&mrt_lock);
1542 mroute_clean_tables(mrt);
1552 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1554 struct mr6_table *mrt;
1555 struct flowi6 fl6 = {
1556 .flowi6_iif = skb->skb_iif,
1557 .flowi6_oif = skb->dev->ifindex,
1558 .flowi6_mark = skb->mark,
1561 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1564 return mrt->mroute6_sk;
1568 * Socket options and virtual interface manipulation. The whole
1569 * virtual interface system is a complete heap, but unfortunately
1570 * that's how BSD mrouted happens to think. Maybe one day with a proper
1571 * MOSPF/PIM router set up we can clean this up.
1574 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1580 struct net *net = sock_net(sk);
1581 struct mr6_table *mrt;
1583 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1587 if (optname != MRT6_INIT) {
1588 if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1594 if (sk->sk_type != SOCK_RAW ||
1595 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1597 if (optlen < sizeof(int))
1600 return ip6mr_sk_init(mrt, sk);
1603 return ip6mr_sk_done(sk);
1606 if (optlen < sizeof(vif))
1608 if (copy_from_user(&vif, optval, sizeof(vif)))
1610 if (vif.mif6c_mifi >= MAXMIFS)
1613 ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1618 if (optlen < sizeof(mifi_t))
1620 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1623 ret = mif6_delete(mrt, mifi, NULL);
1628 * Manipulate the forwarding caches. These live
1629 * in a sort of kernel/user symbiosis.
1633 if (optlen < sizeof(mfc))
1635 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1638 if (optname == MRT6_DEL_MFC)
1639 ret = ip6mr_mfc_delete(mrt, &mfc);
1641 ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1646 * Control PIM assert (to activate pim will activate assert)
1651 if (get_user(v, (int __user *)optval))
1653 mrt->mroute_do_assert = !!v;
1657 #ifdef CONFIG_IPV6_PIMSM_V2
1661 if (get_user(v, (int __user *)optval))
1666 if (v != mrt->mroute_do_pim) {
1667 mrt->mroute_do_pim = v;
1668 mrt->mroute_do_assert = v;
1675 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1680 if (optlen != sizeof(u32))
1682 if (get_user(v, (u32 __user *)optval))
1684 if (sk == mrt->mroute6_sk)
1689 if (!ip6mr_new_table(net, v))
1691 raw6_sk(sk)->ip6mr_table = v;
1697 * Spurious command, or MRT6_VERSION which you cannot
1701 return -ENOPROTOOPT;
1706 * Getsock opt support for the multicast routing system.
1709 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1714 struct net *net = sock_net(sk);
1715 struct mr6_table *mrt;
1717 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1725 #ifdef CONFIG_IPV6_PIMSM_V2
1727 val = mrt->mroute_do_pim;
1731 val = mrt->mroute_do_assert;
1734 return -ENOPROTOOPT;
1737 if (get_user(olr, optlen))
1740 olr = min_t(int, olr, sizeof(int));
1744 if (put_user(olr, optlen))
1746 if (copy_to_user(optval, &val, olr))
1752 * The IP multicast ioctl support routines.
1755 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1757 struct sioc_sg_req6 sr;
1758 struct sioc_mif_req6 vr;
1759 struct mif_device *vif;
1760 struct mfc6_cache *c;
1761 struct net *net = sock_net(sk);
1762 struct mr6_table *mrt;
1764 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1769 case SIOCGETMIFCNT_IN6:
1770 if (copy_from_user(&vr, arg, sizeof(vr)))
1772 if (vr.mifi >= mrt->maxvif)
1774 read_lock(&mrt_lock);
1775 vif = &mrt->vif6_table[vr.mifi];
1776 if (MIF_EXISTS(mrt, vr.mifi)) {
1777 vr.icount = vif->pkt_in;
1778 vr.ocount = vif->pkt_out;
1779 vr.ibytes = vif->bytes_in;
1780 vr.obytes = vif->bytes_out;
1781 read_unlock(&mrt_lock);
1783 if (copy_to_user(arg, &vr, sizeof(vr)))
1787 read_unlock(&mrt_lock);
1788 return -EADDRNOTAVAIL;
1789 case SIOCGETSGCNT_IN6:
1790 if (copy_from_user(&sr, arg, sizeof(sr)))
1793 read_lock(&mrt_lock);
1794 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1796 sr.pktcnt = c->mfc_un.res.pkt;
1797 sr.bytecnt = c->mfc_un.res.bytes;
1798 sr.wrong_if = c->mfc_un.res.wrong_if;
1799 read_unlock(&mrt_lock);
1801 if (copy_to_user(arg, &sr, sizeof(sr)))
1805 read_unlock(&mrt_lock);
1806 return -EADDRNOTAVAIL;
1808 return -ENOIOCTLCMD;
1812 #ifdef CONFIG_COMPAT
1813 struct compat_sioc_sg_req6 {
1814 struct sockaddr_in6 src;
1815 struct sockaddr_in6 grp;
1816 compat_ulong_t pktcnt;
1817 compat_ulong_t bytecnt;
1818 compat_ulong_t wrong_if;
1821 struct compat_sioc_mif_req6 {
1823 compat_ulong_t icount;
1824 compat_ulong_t ocount;
1825 compat_ulong_t ibytes;
1826 compat_ulong_t obytes;
1829 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1831 struct compat_sioc_sg_req6 sr;
1832 struct compat_sioc_mif_req6 vr;
1833 struct mif_device *vif;
1834 struct mfc6_cache *c;
1835 struct net *net = sock_net(sk);
1836 struct mr6_table *mrt;
1838 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1843 case SIOCGETMIFCNT_IN6:
1844 if (copy_from_user(&vr, arg, sizeof(vr)))
1846 if (vr.mifi >= mrt->maxvif)
1848 read_lock(&mrt_lock);
1849 vif = &mrt->vif6_table[vr.mifi];
1850 if (MIF_EXISTS(mrt, vr.mifi)) {
1851 vr.icount = vif->pkt_in;
1852 vr.ocount = vif->pkt_out;
1853 vr.ibytes = vif->bytes_in;
1854 vr.obytes = vif->bytes_out;
1855 read_unlock(&mrt_lock);
1857 if (copy_to_user(arg, &vr, sizeof(vr)))
1861 read_unlock(&mrt_lock);
1862 return -EADDRNOTAVAIL;
1863 case SIOCGETSGCNT_IN6:
1864 if (copy_from_user(&sr, arg, sizeof(sr)))
1867 read_lock(&mrt_lock);
1868 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1870 sr.pktcnt = c->mfc_un.res.pkt;
1871 sr.bytecnt = c->mfc_un.res.bytes;
1872 sr.wrong_if = c->mfc_un.res.wrong_if;
1873 read_unlock(&mrt_lock);
1875 if (copy_to_user(arg, &sr, sizeof(sr)))
1879 read_unlock(&mrt_lock);
1880 return -EADDRNOTAVAIL;
1882 return -ENOIOCTLCMD;
1887 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1889 IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1890 IPSTATS_MIB_OUTFORWDATAGRAMS);
1891 return dst_output(skb);
1895 * Processing handlers for ip6mr_forward
1898 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1899 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1901 struct ipv6hdr *ipv6h;
1902 struct mif_device *vif = &mrt->vif6_table[vifi];
1903 struct net_device *dev;
1904 struct dst_entry *dst;
1907 if (vif->dev == NULL)
1910 #ifdef CONFIG_IPV6_PIMSM_V2
1911 if (vif->flags & MIFF_REGISTER) {
1913 vif->bytes_out += skb->len;
1914 vif->dev->stats.tx_bytes += skb->len;
1915 vif->dev->stats.tx_packets++;
1916 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1921 ipv6h = ipv6_hdr(skb);
1923 fl6 = (struct flowi6) {
1924 .flowi6_oif = vif->link,
1925 .daddr = ipv6h->daddr,
1928 dst = ip6_route_output(net, NULL, &fl6);
1933 skb_dst_set(skb, dst);
1936 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1937 * not only before forwarding, but after forwarding on all output
1938 * interfaces. It is clear, if mrouter runs a multicasting
1939 * program, it should receive packets not depending to what interface
1940 * program is joined.
1941 * If we will not make it, the program will have to join on all
1942 * interfaces. On the other hand, multihoming host (or router, but
1943 * not mrouter) cannot join to more than one interface - it will
1944 * result in receiving multiple packets.
1949 vif->bytes_out += skb->len;
1951 /* We are about to write */
1952 /* XXX: extension headers? */
1953 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1956 ipv6h = ipv6_hdr(skb);
1959 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1961 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1962 ip6mr_forward2_finish);
1969 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1973 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1974 if (mrt->vif6_table[ct].dev == dev)
1980 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1981 struct sk_buff *skb, struct mfc6_cache *cache)
1986 vif = cache->mf6c_parent;
1987 cache->mfc_un.res.pkt++;
1988 cache->mfc_un.res.bytes += skb->len;
1991 * Wrong interface: drop packet and (maybe) send PIM assert.
1993 if (mrt->vif6_table[vif].dev != skb->dev) {
1996 cache->mfc_un.res.wrong_if++;
1997 true_vifi = ip6mr_find_vif(mrt, skb->dev);
1999 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2000 /* pimsm uses asserts, when switching from RPT to SPT,
2001 so that we cannot check that packet arrived on an oif.
2002 It is bad, but otherwise we would need to move pretty
2003 large chunk of pimd to kernel. Ough... --ANK
2005 (mrt->mroute_do_pim ||
2006 cache->mfc_un.res.ttls[true_vifi] < 255) &&
2008 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2009 cache->mfc_un.res.last_assert = jiffies;
2010 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2015 mrt->vif6_table[vif].pkt_in++;
2016 mrt->vif6_table[vif].bytes_in += skb->len;
2021 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2022 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2024 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2026 ip6mr_forward2(net, mrt, skb2, cache, psend);
2032 ip6mr_forward2(net, mrt, skb, cache, psend);
2043 * Multicast packets for forwarding arrive here
2046 int ip6_mr_input(struct sk_buff *skb)
2048 struct mfc6_cache *cache;
2049 struct net *net = dev_net(skb->dev);
2050 struct mr6_table *mrt;
2051 struct flowi6 fl6 = {
2052 .flowi6_iif = skb->dev->ifindex,
2053 .flowi6_mark = skb->mark,
2057 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2063 read_lock(&mrt_lock);
2064 cache = ip6mr_cache_find(mrt,
2065 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2068 * No usable cache entry
2070 if (cache == NULL) {
2073 vif = ip6mr_find_vif(mrt, skb->dev);
2075 int err = ip6mr_cache_unresolved(mrt, vif, skb);
2076 read_unlock(&mrt_lock);
2080 read_unlock(&mrt_lock);
2085 ip6_mr_forward(net, mrt, skb, cache);
2087 read_unlock(&mrt_lock);
2093 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2094 struct mfc6_cache *c, struct rtmsg *rtm)
2097 struct rtnexthop *nhp;
2098 u8 *b = skb_tail_pointer(skb);
2099 struct rtattr *mp_head;
2101 /* If cache is unresolved, don't try to parse IIF and OIF */
2102 if (c->mf6c_parent >= MAXMIFS)
2105 if (MIF_EXISTS(mrt, c->mf6c_parent))
2106 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2108 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2110 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2111 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2112 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2113 goto rtattr_failure;
2114 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2115 nhp->rtnh_flags = 0;
2116 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2117 nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2118 nhp->rtnh_len = sizeof(*nhp);
2121 mp_head->rta_type = RTA_MULTIPATH;
2122 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2123 rtm->rtm_type = RTN_MULTICAST;
2131 int ip6mr_get_route(struct net *net,
2132 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2135 struct mr6_table *mrt;
2136 struct mfc6_cache *cache;
2137 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2139 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2143 read_lock(&mrt_lock);
2144 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2147 struct sk_buff *skb2;
2148 struct ipv6hdr *iph;
2149 struct net_device *dev;
2153 read_unlock(&mrt_lock);
2158 if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2159 read_unlock(&mrt_lock);
2163 /* really correct? */
2164 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2166 read_unlock(&mrt_lock);
2170 skb_reset_transport_header(skb2);
2172 skb_put(skb2, sizeof(struct ipv6hdr));
2173 skb_reset_network_header(skb2);
2175 iph = ipv6_hdr(skb2);
2178 iph->flow_lbl[0] = 0;
2179 iph->flow_lbl[1] = 0;
2180 iph->flow_lbl[2] = 0;
2181 iph->payload_len = 0;
2182 iph->nexthdr = IPPROTO_NONE;
2184 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
2185 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
2187 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2188 read_unlock(&mrt_lock);
2193 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2194 cache->mfc_flags |= MFC_NOTIFY;
2196 err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2197 read_unlock(&mrt_lock);
2201 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2202 u32 pid, u32 seq, struct mfc6_cache *c)
2204 struct nlmsghdr *nlh;
2207 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2211 rtm = nlmsg_data(nlh);
2212 rtm->rtm_family = RTNL_FAMILY_IPMR;
2213 rtm->rtm_dst_len = 128;
2214 rtm->rtm_src_len = 128;
2216 rtm->rtm_table = mrt->id;
2217 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2218 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2219 rtm->rtm_protocol = RTPROT_UNSPEC;
2222 NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2223 NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2225 if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2226 goto nla_put_failure;
2228 return nlmsg_end(skb, nlh);
2231 nlmsg_cancel(skb, nlh);
2235 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2237 struct net *net = sock_net(skb->sk);
2238 struct mr6_table *mrt;
2239 struct mfc6_cache *mfc;
2240 unsigned int t = 0, s_t;
2241 unsigned int h = 0, s_h;
2242 unsigned int e = 0, s_e;
2248 read_lock(&mrt_lock);
2249 ip6mr_for_each_table(mrt, net) {
2254 for (h = s_h; h < MFC6_LINES; h++) {
2255 list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2258 if (ip6mr_fill_mroute(mrt, skb,
2259 NETLINK_CB(cb->skb).pid,
2273 read_unlock(&mrt_lock);