2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 #include <net/fib_rules.h>
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1
73 struct list_head list;
75 struct sock *mroute_sk;
76 struct timer_list ipmr_expire_timer;
77 struct list_head mfc_unres_queue;
78 struct list_head mfc_cache_array[MFC_LINES];
79 struct vif_device vif_table[MAXVIFS];
81 atomic_t cache_resolve_queue_len;
84 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
85 int mroute_reg_vif_num;
90 struct fib_rule common;
97 /* Big lock, protecting vif table, mrt cache and mroute socket state.
98 Note that the changes are semaphored via rtnl_lock.
101 static DEFINE_RWLOCK(mrt_lock);
104 * Multicast router control variables
107 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
109 /* Special spinlock for queue of unresolved entries */
110 static DEFINE_SPINLOCK(mfc_unres_lock);
112 /* We return to original Alan's scheme. Hash table of resolved
113 entries is changed only in process context and protected
114 with weak lock mrt_lock. Queue of unresolved entries is protected
115 with strong spinlock mfc_unres_lock.
117 In this case data path is free of exclusive locks at all.
120 static struct kmem_cache *mrt_cachep __read_mostly;
122 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
123 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
124 struct sk_buff *skb, struct mfc_cache *cache,
126 static int ipmr_cache_report(struct mr_table *mrt,
127 struct sk_buff *pkt, vifi_t vifi, int assert);
128 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
129 struct mfc_cache *c, struct rtmsg *rtm);
130 static void ipmr_expire_process(unsigned long arg);
132 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
133 #define ipmr_for_each_table(mrt, net) \
134 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
136 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
138 struct mr_table *mrt;
140 ipmr_for_each_table(mrt, net) {
147 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
148 struct mr_table **mrt)
150 struct ipmr_result res;
151 struct fib_lookup_arg arg = { .result = &res, };
154 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
161 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
162 int flags, struct fib_lookup_arg *arg)
164 struct ipmr_result *res = arg->result;
165 struct mr_table *mrt;
167 switch (rule->action) {
170 case FR_ACT_UNREACHABLE:
172 case FR_ACT_PROHIBIT:
174 case FR_ACT_BLACKHOLE:
179 mrt = ipmr_get_table(rule->fr_net, rule->table);
186 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
191 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
196 struct fib_rule_hdr *frh, struct nlattr **tb)
201 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
207 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
208 struct fib_rule_hdr *frh)
216 static struct fib_rules_ops ipmr_rules_ops_template = {
217 .family = FIB_RULES_IPMR,
218 .rule_size = sizeof(struct ipmr_rule),
219 .addr_size = sizeof(u32),
220 .action = ipmr_rule_action,
221 .match = ipmr_rule_match,
222 .configure = ipmr_rule_configure,
223 .compare = ipmr_rule_compare,
224 .default_pref = fib_default_rule_pref,
225 .fill = ipmr_rule_fill,
226 .nlgroup = RTNLGRP_IPV4_RULE,
227 .policy = ipmr_rule_policy,
228 .owner = THIS_MODULE,
231 static int __net_init ipmr_rules_init(struct net *net)
233 struct fib_rules_ops *ops;
234 struct mr_table *mrt;
237 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 INIT_LIST_HEAD(&net->ipv4.mr_tables);
243 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
249 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 net->ipv4.mr_rules_ops = ops;
259 fib_rules_unregister(ops);
263 static void __net_exit ipmr_rules_exit(struct net *net)
265 struct mr_table *mrt, *next;
267 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
269 fib_rules_unregister(net->ipv4.mr_rules_ops);
272 #define ipmr_for_each_table(mrt, net) \
273 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
275 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
277 return net->ipv4.mrt;
280 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
281 struct mr_table **mrt)
283 *mrt = net->ipv4.mrt;
287 static int __net_init ipmr_rules_init(struct net *net)
289 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
290 return net->ipv4.mrt ? 0 : -ENOMEM;
293 static void __net_exit ipmr_rules_exit(struct net *net)
295 kfree(net->ipv4.mrt);
299 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
301 struct mr_table *mrt;
304 mrt = ipmr_get_table(net, id);
308 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
313 /* Forwarding cache */
314 for (i = 0; i < MFC_LINES; i++)
315 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
317 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
319 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
322 #ifdef CONFIG_IP_PIMSM
323 mrt->mroute_reg_vif_num = -1;
325 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
326 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
333 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
335 struct net *net = dev_net(dev);
339 dev = __dev_get_by_name(net, "tunl0");
341 const struct net_device_ops *ops = dev->netdev_ops;
343 struct ip_tunnel_parm p;
345 memset(&p, 0, sizeof(p));
346 p.iph.daddr = v->vifc_rmt_addr.s_addr;
347 p.iph.saddr = v->vifc_lcl_addr.s_addr;
350 p.iph.protocol = IPPROTO_IPIP;
351 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
352 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
354 if (ops->ndo_do_ioctl) {
355 mm_segment_t oldfs = get_fs();
358 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
365 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
367 struct net_device *dev;
369 dev = __dev_get_by_name(net, "tunl0");
372 const struct net_device_ops *ops = dev->netdev_ops;
375 struct ip_tunnel_parm p;
376 struct in_device *in_dev;
378 memset(&p, 0, sizeof(p));
379 p.iph.daddr = v->vifc_rmt_addr.s_addr;
380 p.iph.saddr = v->vifc_lcl_addr.s_addr;
383 p.iph.protocol = IPPROTO_IPIP;
384 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
385 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
387 if (ops->ndo_do_ioctl) {
388 mm_segment_t oldfs = get_fs();
391 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
399 (dev = __dev_get_by_name(net, p.name)) != NULL) {
400 dev->flags |= IFF_MULTICAST;
402 in_dev = __in_dev_get_rtnl(dev);
406 ipv4_devconf_setall(in_dev);
407 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
417 /* allow the register to be completed before unregistering. */
421 unregister_netdevice(dev);
425 #ifdef CONFIG_IP_PIMSM
427 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
429 struct net *net = dev_net(dev);
430 struct mr_table *mrt;
438 err = ipmr_fib_lookup(net, &fl, &mrt);
442 read_lock(&mrt_lock);
443 dev->stats.tx_bytes += skb->len;
444 dev->stats.tx_packets++;
445 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
446 read_unlock(&mrt_lock);
451 static const struct net_device_ops reg_vif_netdev_ops = {
452 .ndo_start_xmit = reg_vif_xmit,
455 static void reg_vif_setup(struct net_device *dev)
457 dev->type = ARPHRD_PIMREG;
458 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
459 dev->flags = IFF_NOARP;
460 dev->netdev_ops = ®_vif_netdev_ops,
461 dev->destructor = free_netdev;
462 dev->features |= NETIF_F_NETNS_LOCAL;
465 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
467 struct net_device *dev;
468 struct in_device *in_dev;
471 if (mrt->id == RT_TABLE_DEFAULT)
472 sprintf(name, "pimreg");
474 sprintf(name, "pimreg%u", mrt->id);
476 dev = alloc_netdev(0, name, reg_vif_setup);
481 dev_net_set(dev, net);
483 if (register_netdevice(dev)) {
490 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
495 ipv4_devconf_setall(in_dev);
496 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
507 /* allow the register to be completed before unregistering. */
511 unregister_netdevice(dev);
518 * @notify: Set to 1, if the caller is a notifier_call
521 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
522 struct list_head *head)
524 struct vif_device *v;
525 struct net_device *dev;
526 struct in_device *in_dev;
528 if (vifi < 0 || vifi >= mrt->maxvif)
529 return -EADDRNOTAVAIL;
531 v = &mrt->vif_table[vifi];
533 write_lock_bh(&mrt_lock);
538 write_unlock_bh(&mrt_lock);
539 return -EADDRNOTAVAIL;
542 #ifdef CONFIG_IP_PIMSM
543 if (vifi == mrt->mroute_reg_vif_num)
544 mrt->mroute_reg_vif_num = -1;
547 if (vifi+1 == mrt->maxvif) {
549 for (tmp=vifi-1; tmp>=0; tmp--) {
550 if (VIF_EXISTS(mrt, tmp))
556 write_unlock_bh(&mrt_lock);
558 dev_set_allmulti(dev, -1);
560 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
561 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
562 ip_rt_multicast_event(in_dev);
565 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
566 unregister_netdevice_queue(dev, head);
572 static inline void ipmr_cache_free(struct mfc_cache *c)
574 kmem_cache_free(mrt_cachep, c);
577 /* Destroy an unresolved cache entry, killing queued skbs
578 and reporting error to netlink readers.
581 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
583 struct net *net = NULL; //mrt->net;
587 atomic_dec(&mrt->cache_resolve_queue_len);
589 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
590 if (ip_hdr(skb)->version == 0) {
591 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
592 nlh->nlmsg_type = NLMSG_ERROR;
593 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
594 skb_trim(skb, nlh->nlmsg_len);
596 e->error = -ETIMEDOUT;
597 memset(&e->msg, 0, sizeof(e->msg));
599 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
608 /* Timer process for the unresolved queue. */
610 static void ipmr_expire_process(unsigned long arg)
612 struct mr_table *mrt = (struct mr_table *)arg;
614 unsigned long expires;
615 struct mfc_cache *c, *next;
617 if (!spin_trylock(&mfc_unres_lock)) {
618 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
622 if (list_empty(&mrt->mfc_unres_queue))
628 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
629 if (time_after(c->mfc_un.unres.expires, now)) {
630 unsigned long interval = c->mfc_un.unres.expires - now;
631 if (interval < expires)
637 ipmr_destroy_unres(mrt, c);
640 if (!list_empty(&mrt->mfc_unres_queue))
641 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
644 spin_unlock(&mfc_unres_lock);
647 /* Fill oifs list. It is called under write locked mrt_lock. */
649 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
654 cache->mfc_un.res.minvif = MAXVIFS;
655 cache->mfc_un.res.maxvif = 0;
656 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
658 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
659 if (VIF_EXISTS(mrt, vifi) &&
660 ttls[vifi] && ttls[vifi] < 255) {
661 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
662 if (cache->mfc_un.res.minvif > vifi)
663 cache->mfc_un.res.minvif = vifi;
664 if (cache->mfc_un.res.maxvif <= vifi)
665 cache->mfc_un.res.maxvif = vifi + 1;
670 static int vif_add(struct net *net, struct mr_table *mrt,
671 struct vifctl *vifc, int mrtsock)
673 int vifi = vifc->vifc_vifi;
674 struct vif_device *v = &mrt->vif_table[vifi];
675 struct net_device *dev;
676 struct in_device *in_dev;
680 if (VIF_EXISTS(mrt, vifi))
683 switch (vifc->vifc_flags) {
684 #ifdef CONFIG_IP_PIMSM
687 * Special Purpose VIF in PIM
688 * All the packets will be sent to the daemon
690 if (mrt->mroute_reg_vif_num >= 0)
692 dev = ipmr_reg_vif(net, mrt);
695 err = dev_set_allmulti(dev, 1);
697 unregister_netdevice(dev);
704 dev = ipmr_new_tunnel(net, vifc);
707 err = dev_set_allmulti(dev, 1);
709 ipmr_del_tunnel(dev, vifc);
715 case VIFF_USE_IFINDEX:
717 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
718 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
719 if (dev && dev->ip_ptr == NULL) {
721 return -EADDRNOTAVAIL;
724 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
727 return -EADDRNOTAVAIL;
728 err = dev_set_allmulti(dev, 1);
738 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
740 return -EADDRNOTAVAIL;
742 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
743 ip_rt_multicast_event(in_dev);
746 * Fill in the VIF structures
748 v->rate_limit = vifc->vifc_rate_limit;
749 v->local = vifc->vifc_lcl_addr.s_addr;
750 v->remote = vifc->vifc_rmt_addr.s_addr;
751 v->flags = vifc->vifc_flags;
753 v->flags |= VIFF_STATIC;
754 v->threshold = vifc->vifc_threshold;
759 v->link = dev->ifindex;
760 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
761 v->link = dev->iflink;
763 /* And finish update writing critical data */
764 write_lock_bh(&mrt_lock);
766 #ifdef CONFIG_IP_PIMSM
767 if (v->flags&VIFF_REGISTER)
768 mrt->mroute_reg_vif_num = vifi;
770 if (vifi+1 > mrt->maxvif)
771 mrt->maxvif = vifi+1;
772 write_unlock_bh(&mrt_lock);
776 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
780 int line = MFC_HASH(mcastgrp, origin);
783 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
784 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
791 * Allocate a multicast cache entry
793 static struct mfc_cache *ipmr_cache_alloc(void)
795 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
798 c->mfc_un.res.minvif = MAXVIFS;
802 static struct mfc_cache *ipmr_cache_alloc_unres(void)
804 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
807 skb_queue_head_init(&c->mfc_un.unres.unresolved);
808 c->mfc_un.unres.expires = jiffies + 10*HZ;
813 * A cache entry has gone into a resolved state from queued
816 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
817 struct mfc_cache *uc, struct mfc_cache *c)
823 * Play the pending entries through our router
826 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
827 if (ip_hdr(skb)->version == 0) {
828 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
830 if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
831 nlh->nlmsg_len = (skb_tail_pointer(skb) -
834 nlh->nlmsg_type = NLMSG_ERROR;
835 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
836 skb_trim(skb, nlh->nlmsg_len);
838 e->error = -EMSGSIZE;
839 memset(&e->msg, 0, sizeof(e->msg));
842 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
844 ip_mr_forward(net, mrt, skb, c, 0);
849 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
850 * expects the following bizarre scheme.
852 * Called under mrt_lock.
855 static int ipmr_cache_report(struct mr_table *mrt,
856 struct sk_buff *pkt, vifi_t vifi, int assert)
859 const int ihl = ip_hdrlen(pkt);
860 struct igmphdr *igmp;
864 #ifdef CONFIG_IP_PIMSM
865 if (assert == IGMPMSG_WHOLEPKT)
866 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
869 skb = alloc_skb(128, GFP_ATOMIC);
874 #ifdef CONFIG_IP_PIMSM
875 if (assert == IGMPMSG_WHOLEPKT) {
876 /* Ugly, but we have no choice with this interface.
877 Duplicate old header, fix ihl, length etc.
878 And all this only to mangle msg->im_msgtype and
879 to set msg->im_mbz to "mbz" :-)
881 skb_push(skb, sizeof(struct iphdr));
882 skb_reset_network_header(skb);
883 skb_reset_transport_header(skb);
884 msg = (struct igmpmsg *)skb_network_header(skb);
885 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
886 msg->im_msgtype = IGMPMSG_WHOLEPKT;
888 msg->im_vif = mrt->mroute_reg_vif_num;
889 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
890 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
891 sizeof(struct iphdr));
900 skb->network_header = skb->tail;
902 skb_copy_to_linear_data(skb, pkt->data, ihl);
903 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
904 msg = (struct igmpmsg *)skb_network_header(skb);
906 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
912 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
914 msg->im_msgtype = assert;
916 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
917 skb->transport_header = skb->network_header;
920 if (mrt->mroute_sk == NULL) {
928 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
931 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
939 * Queue a packet for resolution. It gets locked cache entry!
943 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
948 const struct iphdr *iph = ip_hdr(skb);
950 spin_lock_bh(&mfc_unres_lock);
951 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
952 if (c->mfc_mcastgrp == iph->daddr &&
953 c->mfc_origin == iph->saddr) {
961 * Create a new entry if allowable
964 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
965 (c = ipmr_cache_alloc_unres()) == NULL) {
966 spin_unlock_bh(&mfc_unres_lock);
973 * Fill in the new cache entry
976 c->mfc_origin = iph->saddr;
977 c->mfc_mcastgrp = iph->daddr;
980 * Reflect first query at mrouted.
982 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
984 /* If the report failed throw the cache entry
987 spin_unlock_bh(&mfc_unres_lock);
994 atomic_inc(&mrt->cache_resolve_queue_len);
995 list_add(&c->list, &mrt->mfc_unres_queue);
997 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1001 * See if we can append the packet
1003 if (c->mfc_un.unres.unresolved.qlen>3) {
1007 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1011 spin_unlock_bh(&mfc_unres_lock);
1016 * MFC cache manipulation by user space mroute daemon
1019 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1022 struct mfc_cache *c, *next;
1024 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1026 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1027 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1028 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1029 write_lock_bh(&mrt_lock);
1031 write_unlock_bh(&mrt_lock);
1040 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1041 struct mfcctl *mfc, int mrtsock)
1045 struct mfc_cache *uc, *c;
1047 if (mfc->mfcc_parent >= MAXVIFS)
1050 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1052 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1053 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1054 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1061 write_lock_bh(&mrt_lock);
1062 c->mfc_parent = mfc->mfcc_parent;
1063 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1065 c->mfc_flags |= MFC_STATIC;
1066 write_unlock_bh(&mrt_lock);
1070 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1073 c = ipmr_cache_alloc();
1077 c->mfc_origin = mfc->mfcc_origin.s_addr;
1078 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1079 c->mfc_parent = mfc->mfcc_parent;
1080 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1082 c->mfc_flags |= MFC_STATIC;
1084 write_lock_bh(&mrt_lock);
1085 list_add(&c->list, &mrt->mfc_cache_array[line]);
1086 write_unlock_bh(&mrt_lock);
1089 * Check to see if we resolved a queued list. If so we
1090 * need to send on the frames and tidy up.
1093 spin_lock_bh(&mfc_unres_lock);
1094 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1095 if (uc->mfc_origin == c->mfc_origin &&
1096 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1097 list_del(&uc->list);
1098 atomic_dec(&mrt->cache_resolve_queue_len);
1103 if (list_empty(&mrt->mfc_unres_queue))
1104 del_timer(&mrt->ipmr_expire_timer);
1105 spin_unlock_bh(&mfc_unres_lock);
1108 ipmr_cache_resolve(net, mrt, uc, c);
1109 ipmr_cache_free(uc);
1115 * Close the multicast socket, and clear the vif tables etc
1118 static void mroute_clean_tables(struct mr_table *mrt)
1122 struct mfc_cache *c, *next;
1125 * Shut down all active vif entries
1127 for (i = 0; i < mrt->maxvif; i++) {
1128 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1129 vif_delete(mrt, i, 0, &list);
1131 unregister_netdevice_many(&list);
1136 for (i = 0; i < MFC_LINES; i++) {
1137 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1138 if (c->mfc_flags&MFC_STATIC)
1140 write_lock_bh(&mrt_lock);
1142 write_unlock_bh(&mrt_lock);
1148 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1149 spin_lock_bh(&mfc_unres_lock);
1150 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1152 ipmr_destroy_unres(mrt, c);
1154 spin_unlock_bh(&mfc_unres_lock);
1158 static void mrtsock_destruct(struct sock *sk)
1160 struct net *net = sock_net(sk);
1161 struct mr_table *mrt;
1164 ipmr_for_each_table(mrt, net) {
1165 if (sk == mrt->mroute_sk) {
1166 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1168 write_lock_bh(&mrt_lock);
1169 mrt->mroute_sk = NULL;
1170 write_unlock_bh(&mrt_lock);
1172 mroute_clean_tables(mrt);
1179 * Socket options and virtual interface manipulation. The whole
1180 * virtual interface system is a complete heap, but unfortunately
1181 * that's how BSD mrouted happens to think. Maybe one day with a proper
1182 * MOSPF/PIM router set up we can clean this up.
1185 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1190 struct net *net = sock_net(sk);
1191 struct mr_table *mrt;
1193 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1197 if (optname != MRT_INIT) {
1198 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1204 if (sk->sk_type != SOCK_RAW ||
1205 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1207 if (optlen != sizeof(int))
1208 return -ENOPROTOOPT;
1211 if (mrt->mroute_sk) {
1216 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1218 write_lock_bh(&mrt_lock);
1219 mrt->mroute_sk = sk;
1220 write_unlock_bh(&mrt_lock);
1222 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1227 if (sk != mrt->mroute_sk)
1229 return ip_ra_control(sk, 0, NULL);
1232 if (optlen != sizeof(vif))
1234 if (copy_from_user(&vif, optval, sizeof(vif)))
1236 if (vif.vifc_vifi >= MAXVIFS)
1239 if (optname == MRT_ADD_VIF) {
1240 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1242 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1248 * Manipulate the forwarding caches. These live
1249 * in a sort of kernel/user symbiosis.
1253 if (optlen != sizeof(mfc))
1255 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1258 if (optname == MRT_DEL_MFC)
1259 ret = ipmr_mfc_delete(mrt, &mfc);
1261 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1265 * Control PIM assert.
1270 if (get_user(v,(int __user *)optval))
1272 mrt->mroute_do_assert = (v) ? 1 : 0;
1275 #ifdef CONFIG_IP_PIMSM
1280 if (get_user(v,(int __user *)optval))
1286 if (v != mrt->mroute_do_pim) {
1287 mrt->mroute_do_pim = v;
1288 mrt->mroute_do_assert = v;
1294 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1299 if (optlen != sizeof(u32))
1301 if (get_user(v, (u32 __user *)optval))
1303 if (sk == mrt->mroute_sk)
1308 if (!ipmr_new_table(net, v))
1310 raw_sk(sk)->ipmr_table = v;
1316 * Spurious command, or MRT_VERSION which you cannot
1320 return -ENOPROTOOPT;
1325 * Getsock opt support for the multicast routing system.
1328 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1332 struct net *net = sock_net(sk);
1333 struct mr_table *mrt;
1335 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1339 if (optname != MRT_VERSION &&
1340 #ifdef CONFIG_IP_PIMSM
1343 optname!=MRT_ASSERT)
1344 return -ENOPROTOOPT;
1346 if (get_user(olr, optlen))
1349 olr = min_t(unsigned int, olr, sizeof(int));
1353 if (put_user(olr, optlen))
1355 if (optname == MRT_VERSION)
1357 #ifdef CONFIG_IP_PIMSM
1358 else if (optname == MRT_PIM)
1359 val = mrt->mroute_do_pim;
1362 val = mrt->mroute_do_assert;
1363 if (copy_to_user(optval, &val, olr))
1369 * The IP multicast ioctl support routines.
1372 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1374 struct sioc_sg_req sr;
1375 struct sioc_vif_req vr;
1376 struct vif_device *vif;
1377 struct mfc_cache *c;
1378 struct net *net = sock_net(sk);
1379 struct mr_table *mrt;
1381 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1387 if (copy_from_user(&vr, arg, sizeof(vr)))
1389 if (vr.vifi >= mrt->maxvif)
1391 read_lock(&mrt_lock);
1392 vif = &mrt->vif_table[vr.vifi];
1393 if (VIF_EXISTS(mrt, vr.vifi)) {
1394 vr.icount = vif->pkt_in;
1395 vr.ocount = vif->pkt_out;
1396 vr.ibytes = vif->bytes_in;
1397 vr.obytes = vif->bytes_out;
1398 read_unlock(&mrt_lock);
1400 if (copy_to_user(arg, &vr, sizeof(vr)))
1404 read_unlock(&mrt_lock);
1405 return -EADDRNOTAVAIL;
1407 if (copy_from_user(&sr, arg, sizeof(sr)))
1410 read_lock(&mrt_lock);
1411 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1413 sr.pktcnt = c->mfc_un.res.pkt;
1414 sr.bytecnt = c->mfc_un.res.bytes;
1415 sr.wrong_if = c->mfc_un.res.wrong_if;
1416 read_unlock(&mrt_lock);
1418 if (copy_to_user(arg, &sr, sizeof(sr)))
1422 read_unlock(&mrt_lock);
1423 return -EADDRNOTAVAIL;
1425 return -ENOIOCTLCMD;
1430 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1432 struct net_device *dev = ptr;
1433 struct net *net = dev_net(dev);
1434 struct mr_table *mrt;
1435 struct vif_device *v;
1439 if (event != NETDEV_UNREGISTER)
1442 ipmr_for_each_table(mrt, net) {
1443 v = &mrt->vif_table[0];
1444 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1446 vif_delete(mrt, ct, 1, &list);
1449 unregister_netdevice_many(&list);
1454 static struct notifier_block ip_mr_notifier = {
1455 .notifier_call = ipmr_device_event,
1459 * Encapsulate a packet by attaching a valid IPIP header to it.
1460 * This avoids tunnel drivers and other mess and gives us the speed so
1461 * important for multicast video.
1464 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1467 struct iphdr *old_iph = ip_hdr(skb);
1469 skb_push(skb, sizeof(struct iphdr));
1470 skb->transport_header = skb->network_header;
1471 skb_reset_network_header(skb);
1475 iph->tos = old_iph->tos;
1476 iph->ttl = old_iph->ttl;
1480 iph->protocol = IPPROTO_IPIP;
1482 iph->tot_len = htons(skb->len);
1483 ip_select_ident(iph, skb_dst(skb), NULL);
1486 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1490 static inline int ipmr_forward_finish(struct sk_buff *skb)
1492 struct ip_options * opt = &(IPCB(skb)->opt);
1494 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1496 if (unlikely(opt->optlen))
1497 ip_forward_options(skb);
1499 return dst_output(skb);
1503 * Processing handlers for ipmr_forward
1506 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1507 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1509 const struct iphdr *iph = ip_hdr(skb);
1510 struct vif_device *vif = &mrt->vif_table[vifi];
1511 struct net_device *dev;
1515 if (vif->dev == NULL)
1518 #ifdef CONFIG_IP_PIMSM
1519 if (vif->flags & VIFF_REGISTER) {
1521 vif->bytes_out += skb->len;
1522 vif->dev->stats.tx_bytes += skb->len;
1523 vif->dev->stats.tx_packets++;
1524 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1529 if (vif->flags&VIFF_TUNNEL) {
1530 struct flowi fl = { .oif = vif->link,
1532 { .daddr = vif->remote,
1533 .saddr = vif->local,
1534 .tos = RT_TOS(iph->tos) } },
1535 .proto = IPPROTO_IPIP };
1536 if (ip_route_output_key(net, &rt, &fl))
1538 encap = sizeof(struct iphdr);
1540 struct flowi fl = { .oif = vif->link,
1542 { .daddr = iph->daddr,
1543 .tos = RT_TOS(iph->tos) } },
1544 .proto = IPPROTO_IPIP };
1545 if (ip_route_output_key(net, &rt, &fl))
1549 dev = rt->u.dst.dev;
1551 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1552 /* Do not fragment multicasts. Alas, IPv4 does not
1553 allow to send ICMP, so that packets will disappear
1557 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1562 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1564 if (skb_cow(skb, encap)) {
1570 vif->bytes_out += skb->len;
1573 skb_dst_set(skb, &rt->u.dst);
1574 ip_decrease_ttl(ip_hdr(skb));
1576 /* FIXME: forward and output firewalls used to be called here.
1577 * What do we do with netfilter? -- RR */
1578 if (vif->flags & VIFF_TUNNEL) {
1579 ip_encap(skb, vif->local, vif->remote);
1580 /* FIXME: extra output firewall step used to be here. --RR */
1581 vif->dev->stats.tx_packets++;
1582 vif->dev->stats.tx_bytes += skb->len;
1585 IPCB(skb)->flags |= IPSKB_FORWARDED;
1588 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1589 * not only before forwarding, but after forwarding on all output
1590 * interfaces. It is clear, if mrouter runs a multicasting
1591 * program, it should receive packets not depending to what interface
1592 * program is joined.
1593 * If we will not make it, the program will have to join on all
1594 * interfaces. On the other hand, multihoming host (or router, but
1595 * not mrouter) cannot join to more than one interface - it will
1596 * result in receiving multiple packets.
1598 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1599 ipmr_forward_finish);
1607 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1611 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1612 if (mrt->vif_table[ct].dev == dev)
1618 /* "local" means that we should preserve one skb (for local delivery) */
1620 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1621 struct sk_buff *skb, struct mfc_cache *cache,
1627 vif = cache->mfc_parent;
1628 cache->mfc_un.res.pkt++;
1629 cache->mfc_un.res.bytes += skb->len;
1632 * Wrong interface: drop packet and (maybe) send PIM assert.
1634 if (mrt->vif_table[vif].dev != skb->dev) {
1637 if (skb_rtable(skb)->fl.iif == 0) {
1638 /* It is our own packet, looped back.
1639 Very complicated situation...
1641 The best workaround until routing daemons will be
1642 fixed is not to redistribute packet, if it was
1643 send through wrong interface. It means, that
1644 multicast applications WILL NOT work for
1645 (S,G), which have default multicast route pointing
1646 to wrong oif. In any case, it is not a good
1647 idea to use multicasting applications on router.
1652 cache->mfc_un.res.wrong_if++;
1653 true_vifi = ipmr_find_vif(mrt, skb->dev);
1655 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1656 /* pimsm uses asserts, when switching from RPT to SPT,
1657 so that we cannot check that packet arrived on an oif.
1658 It is bad, but otherwise we would need to move pretty
1659 large chunk of pimd to kernel. Ough... --ANK
1661 (mrt->mroute_do_pim ||
1662 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1664 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1665 cache->mfc_un.res.last_assert = jiffies;
1666 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1671 mrt->vif_table[vif].pkt_in++;
1672 mrt->vif_table[vif].bytes_in += skb->len;
1677 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1678 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1680 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1682 ipmr_queue_xmit(net, mrt, skb2, cache,
1690 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1692 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1694 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1707 * Multicast packets for forwarding arrive here
1710 int ip_mr_input(struct sk_buff *skb)
1712 struct mfc_cache *cache;
1713 struct net *net = dev_net(skb->dev);
1714 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1715 struct mr_table *mrt;
1718 /* Packet is looped back after forward, it should not be
1719 forwarded second time, but still can be delivered locally.
1721 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1724 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1729 if (IPCB(skb)->opt.router_alert) {
1730 if (ip_call_ra_chain(skb))
1732 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1733 /* IGMPv1 (and broken IGMPv2 implementations sort of
1734 Cisco IOS <= 11.2(8)) do not put router alert
1735 option to IGMP packets destined to routable
1736 groups. It is very bad, because it means
1737 that we can forward NO IGMP messages.
1739 read_lock(&mrt_lock);
1740 if (mrt->mroute_sk) {
1742 raw_rcv(mrt->mroute_sk, skb);
1743 read_unlock(&mrt_lock);
1746 read_unlock(&mrt_lock);
1750 read_lock(&mrt_lock);
1751 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1754 * No usable cache entry
1756 if (cache == NULL) {
1760 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1761 ip_local_deliver(skb);
1763 read_unlock(&mrt_lock);
1769 vif = ipmr_find_vif(mrt, skb->dev);
1771 int err = ipmr_cache_unresolved(mrt, vif, skb);
1772 read_unlock(&mrt_lock);
1776 read_unlock(&mrt_lock);
1781 ip_mr_forward(net, mrt, skb, cache, local);
1783 read_unlock(&mrt_lock);
1786 return ip_local_deliver(skb);
1792 return ip_local_deliver(skb);
1797 #ifdef CONFIG_IP_PIMSM
1798 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1799 unsigned int pimlen)
1801 struct net_device *reg_dev = NULL;
1802 struct iphdr *encap;
1804 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1807 a. packet is really destinted to a multicast group
1808 b. packet is not a NULL-REGISTER
1809 c. packet is not truncated
1811 if (!ipv4_is_multicast(encap->daddr) ||
1812 encap->tot_len == 0 ||
1813 ntohs(encap->tot_len) + pimlen > skb->len)
1816 read_lock(&mrt_lock);
1817 if (mrt->mroute_reg_vif_num >= 0)
1818 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1821 read_unlock(&mrt_lock);
1823 if (reg_dev == NULL)
1826 skb->mac_header = skb->network_header;
1827 skb_pull(skb, (u8*)encap - skb->data);
1828 skb_reset_network_header(skb);
1830 skb->protocol = htons(ETH_P_IP);
1832 skb->pkt_type = PACKET_HOST;
1834 reg_dev->stats.rx_bytes += skb->len;
1835 reg_dev->stats.rx_packets++;
1844 #ifdef CONFIG_IP_PIMSM_V1
1846 * Handle IGMP messages of PIMv1
1849 int pim_rcv_v1(struct sk_buff * skb)
1851 struct igmphdr *pim;
1852 struct net *net = dev_net(skb->dev);
1853 struct mr_table *mrt;
1855 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1858 pim = igmp_hdr(skb);
1860 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1863 if (!mrt->mroute_do_pim ||
1864 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1867 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1875 #ifdef CONFIG_IP_PIMSM_V2
1876 static int pim_rcv(struct sk_buff * skb)
1878 struct pimreghdr *pim;
1879 struct net *net = dev_net(skb->dev);
1880 struct mr_table *mrt;
1882 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1885 pim = (struct pimreghdr *)skb_transport_header(skb);
1886 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1887 (pim->flags&PIM_NULL_REGISTER) ||
1888 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1889 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1892 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1895 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1904 ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
1908 struct rtnexthop *nhp;
1909 u8 *b = skb_tail_pointer(skb);
1910 struct rtattr *mp_head;
1912 /* If cache is unresolved, don't try to parse IIF and OIF */
1913 if (c->mfc_parent > MAXVIFS)
1916 if (VIF_EXISTS(mrt, c->mfc_parent))
1917 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1919 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1921 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1922 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1923 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1924 goto rtattr_failure;
1925 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1926 nhp->rtnh_flags = 0;
1927 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1928 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1929 nhp->rtnh_len = sizeof(*nhp);
1932 mp_head->rta_type = RTA_MULTIPATH;
1933 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1934 rtm->rtm_type = RTN_MULTICAST;
1942 int ipmr_get_route(struct net *net,
1943 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1946 struct mr_table *mrt;
1947 struct mfc_cache *cache;
1948 struct rtable *rt = skb_rtable(skb);
1950 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1954 read_lock(&mrt_lock);
1955 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1957 if (cache == NULL) {
1958 struct sk_buff *skb2;
1960 struct net_device *dev;
1964 read_unlock(&mrt_lock);
1969 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1970 read_unlock(&mrt_lock);
1973 skb2 = skb_clone(skb, GFP_ATOMIC);
1975 read_unlock(&mrt_lock);
1979 skb_push(skb2, sizeof(struct iphdr));
1980 skb_reset_network_header(skb2);
1982 iph->ihl = sizeof(struct iphdr) >> 2;
1983 iph->saddr = rt->rt_src;
1984 iph->daddr = rt->rt_dst;
1986 err = ipmr_cache_unresolved(mrt, vif, skb2);
1987 read_unlock(&mrt_lock);
1991 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1992 cache->mfc_flags |= MFC_NOTIFY;
1993 err = ipmr_fill_mroute(mrt, skb, cache, rtm);
1994 read_unlock(&mrt_lock);
1998 #ifdef CONFIG_PROC_FS
2000 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2002 struct ipmr_vif_iter {
2003 struct seq_net_private p;
2004 struct mr_table *mrt;
2008 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2009 struct ipmr_vif_iter *iter,
2012 struct mr_table *mrt = iter->mrt;
2014 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2015 if (!VIF_EXISTS(mrt, iter->ct))
2018 return &mrt->vif_table[iter->ct];
2023 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2024 __acquires(mrt_lock)
2026 struct ipmr_vif_iter *iter = seq->private;
2027 struct net *net = seq_file_net(seq);
2028 struct mr_table *mrt;
2030 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2032 return ERR_PTR(-ENOENT);
2036 read_lock(&mrt_lock);
2037 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2041 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2043 struct ipmr_vif_iter *iter = seq->private;
2044 struct net *net = seq_file_net(seq);
2045 struct mr_table *mrt = iter->mrt;
2048 if (v == SEQ_START_TOKEN)
2049 return ipmr_vif_seq_idx(net, iter, 0);
2051 while (++iter->ct < mrt->maxvif) {
2052 if (!VIF_EXISTS(mrt, iter->ct))
2054 return &mrt->vif_table[iter->ct];
2059 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2060 __releases(mrt_lock)
2062 read_unlock(&mrt_lock);
2065 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2067 struct ipmr_vif_iter *iter = seq->private;
2068 struct mr_table *mrt = iter->mrt;
2070 if (v == SEQ_START_TOKEN) {
2072 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2074 const struct vif_device *vif = v;
2075 const char *name = vif->dev ? vif->dev->name : "none";
2078 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
2079 vif - mrt->vif_table,
2080 name, vif->bytes_in, vif->pkt_in,
2081 vif->bytes_out, vif->pkt_out,
2082 vif->flags, vif->local, vif->remote);
2087 static const struct seq_operations ipmr_vif_seq_ops = {
2088 .start = ipmr_vif_seq_start,
2089 .next = ipmr_vif_seq_next,
2090 .stop = ipmr_vif_seq_stop,
2091 .show = ipmr_vif_seq_show,
2094 static int ipmr_vif_open(struct inode *inode, struct file *file)
2096 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2097 sizeof(struct ipmr_vif_iter));
2100 static const struct file_operations ipmr_vif_fops = {
2101 .owner = THIS_MODULE,
2102 .open = ipmr_vif_open,
2104 .llseek = seq_lseek,
2105 .release = seq_release_net,
2108 struct ipmr_mfc_iter {
2109 struct seq_net_private p;
2110 struct mr_table *mrt;
2111 struct list_head *cache;
2116 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2117 struct ipmr_mfc_iter *it, loff_t pos)
2119 struct mr_table *mrt = it->mrt;
2120 struct mfc_cache *mfc;
2122 read_lock(&mrt_lock);
2123 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2124 it->cache = &mrt->mfc_cache_array[it->ct];
2125 list_for_each_entry(mfc, it->cache, list)
2129 read_unlock(&mrt_lock);
2131 spin_lock_bh(&mfc_unres_lock);
2132 it->cache = &mrt->mfc_unres_queue;
2133 list_for_each_entry(mfc, it->cache, list)
2136 spin_unlock_bh(&mfc_unres_lock);
2143 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2145 struct ipmr_mfc_iter *it = seq->private;
2146 struct net *net = seq_file_net(seq);
2147 struct mr_table *mrt;
2149 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2151 return ERR_PTR(-ENOENT);
2156 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2160 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2162 struct mfc_cache *mfc = v;
2163 struct ipmr_mfc_iter *it = seq->private;
2164 struct net *net = seq_file_net(seq);
2165 struct mr_table *mrt = it->mrt;
2169 if (v == SEQ_START_TOKEN)
2170 return ipmr_mfc_seq_idx(net, seq->private, 0);
2172 if (mfc->list.next != it->cache)
2173 return list_entry(mfc->list.next, struct mfc_cache, list);
2175 if (it->cache == &mrt->mfc_unres_queue)
2178 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2180 while (++it->ct < MFC_LINES) {
2181 it->cache = &mrt->mfc_cache_array[it->ct];
2182 if (list_empty(it->cache))
2184 return list_first_entry(it->cache, struct mfc_cache, list);
2187 /* exhausted cache_array, show unresolved */
2188 read_unlock(&mrt_lock);
2189 it->cache = &mrt->mfc_unres_queue;
2192 spin_lock_bh(&mfc_unres_lock);
2193 if (!list_empty(it->cache))
2194 return list_first_entry(it->cache, struct mfc_cache, list);
2197 spin_unlock_bh(&mfc_unres_lock);
2203 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2205 struct ipmr_mfc_iter *it = seq->private;
2206 struct mr_table *mrt = it->mrt;
2208 if (it->cache == &mrt->mfc_unres_queue)
2209 spin_unlock_bh(&mfc_unres_lock);
2210 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2211 read_unlock(&mrt_lock);
2214 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2218 if (v == SEQ_START_TOKEN) {
2220 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2222 const struct mfc_cache *mfc = v;
2223 const struct ipmr_mfc_iter *it = seq->private;
2224 const struct mr_table *mrt = it->mrt;
2226 seq_printf(seq, "%08lX %08lX %-3hd",
2227 (unsigned long) mfc->mfc_mcastgrp,
2228 (unsigned long) mfc->mfc_origin,
2231 if (it->cache != &mrt->mfc_unres_queue) {
2232 seq_printf(seq, " %8lu %8lu %8lu",
2233 mfc->mfc_un.res.pkt,
2234 mfc->mfc_un.res.bytes,
2235 mfc->mfc_un.res.wrong_if);
2236 for (n = mfc->mfc_un.res.minvif;
2237 n < mfc->mfc_un.res.maxvif; n++ ) {
2238 if (VIF_EXISTS(mrt, n) &&
2239 mfc->mfc_un.res.ttls[n] < 255)
2242 n, mfc->mfc_un.res.ttls[n]);
2245 /* unresolved mfc_caches don't contain
2246 * pkt, bytes and wrong_if values
2248 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2250 seq_putc(seq, '\n');
2255 static const struct seq_operations ipmr_mfc_seq_ops = {
2256 .start = ipmr_mfc_seq_start,
2257 .next = ipmr_mfc_seq_next,
2258 .stop = ipmr_mfc_seq_stop,
2259 .show = ipmr_mfc_seq_show,
2262 static int ipmr_mfc_open(struct inode *inode, struct file *file)
2264 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2265 sizeof(struct ipmr_mfc_iter));
2268 static const struct file_operations ipmr_mfc_fops = {
2269 .owner = THIS_MODULE,
2270 .open = ipmr_mfc_open,
2272 .llseek = seq_lseek,
2273 .release = seq_release_net,
2277 #ifdef CONFIG_IP_PIMSM_V2
2278 static const struct net_protocol pim_protocol = {
2286 * Setup for IP multicast routing
2288 static int __net_init ipmr_net_init(struct net *net)
2292 err = ipmr_rules_init(net);
2296 #ifdef CONFIG_PROC_FS
2298 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2300 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2301 goto proc_cache_fail;
2305 #ifdef CONFIG_PROC_FS
2307 proc_net_remove(net, "ip_mr_vif");
2309 ipmr_rules_exit(net);
2315 static void __net_exit ipmr_net_exit(struct net *net)
2317 #ifdef CONFIG_PROC_FS
2318 proc_net_remove(net, "ip_mr_cache");
2319 proc_net_remove(net, "ip_mr_vif");
2321 ipmr_rules_exit(net);
2324 static struct pernet_operations ipmr_net_ops = {
2325 .init = ipmr_net_init,
2326 .exit = ipmr_net_exit,
2329 int __init ip_mr_init(void)
2333 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2334 sizeof(struct mfc_cache),
2335 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2340 err = register_pernet_subsys(&ipmr_net_ops);
2342 goto reg_pernet_fail;
2344 err = register_netdevice_notifier(&ip_mr_notifier);
2346 goto reg_notif_fail;
2347 #ifdef CONFIG_IP_PIMSM_V2
2348 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2349 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2351 goto add_proto_fail;
2356 #ifdef CONFIG_IP_PIMSM_V2
2358 unregister_netdevice_notifier(&ip_mr_notifier);
2361 unregister_pernet_subsys(&ipmr_net_ops);
2363 kmem_cache_destroy(mrt_cachep);