2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
67 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68 #define CONFIG_IP_PIMSM 1
72 struct sock *mroute_sk;
73 struct timer_list ipmr_expire_timer;
74 struct list_head mfc_unres_queue;
75 struct list_head mfc_cache_array[MFC_LINES];
76 struct vif_device vif_table[MAXVIFS];
78 atomic_t cache_resolve_queue_len;
81 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
82 int mroute_reg_vif_num;
86 /* Big lock, protecting vif table, mrt cache and mroute socket state.
87 Note that the changes are semaphored via rtnl_lock.
90 static DEFINE_RWLOCK(mrt_lock);
93 * Multicast router control variables
96 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
98 /* Special spinlock for queue of unresolved entries */
99 static DEFINE_SPINLOCK(mfc_unres_lock);
101 /* We return to original Alan's scheme. Hash table of resolved
102 entries is changed only in process context and protected
103 with weak lock mrt_lock. Queue of unresolved entries is protected
104 with strong spinlock mfc_unres_lock.
106 In this case data path is free of exclusive locks at all.
109 static struct kmem_cache *mrt_cachep __read_mostly;
111 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
112 struct sk_buff *skb, struct mfc_cache *cache,
114 static int ipmr_cache_report(struct mr_table *mrt,
115 struct sk_buff *pkt, vifi_t vifi, int assert);
116 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
117 struct mfc_cache *c, struct rtmsg *rtm);
119 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
121 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
123 struct net *net = dev_net(dev);
127 dev = __dev_get_by_name(net, "tunl0");
129 const struct net_device_ops *ops = dev->netdev_ops;
131 struct ip_tunnel_parm p;
133 memset(&p, 0, sizeof(p));
134 p.iph.daddr = v->vifc_rmt_addr.s_addr;
135 p.iph.saddr = v->vifc_lcl_addr.s_addr;
138 p.iph.protocol = IPPROTO_IPIP;
139 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
140 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
142 if (ops->ndo_do_ioctl) {
143 mm_segment_t oldfs = get_fs();
146 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
153 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
155 struct net_device *dev;
157 dev = __dev_get_by_name(net, "tunl0");
160 const struct net_device_ops *ops = dev->netdev_ops;
163 struct ip_tunnel_parm p;
164 struct in_device *in_dev;
166 memset(&p, 0, sizeof(p));
167 p.iph.daddr = v->vifc_rmt_addr.s_addr;
168 p.iph.saddr = v->vifc_lcl_addr.s_addr;
171 p.iph.protocol = IPPROTO_IPIP;
172 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
173 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
175 if (ops->ndo_do_ioctl) {
176 mm_segment_t oldfs = get_fs();
179 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
187 (dev = __dev_get_by_name(net, p.name)) != NULL) {
188 dev->flags |= IFF_MULTICAST;
190 in_dev = __in_dev_get_rtnl(dev);
194 ipv4_devconf_setall(in_dev);
195 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
205 /* allow the register to be completed before unregistering. */
209 unregister_netdevice(dev);
213 #ifdef CONFIG_IP_PIMSM
215 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
217 struct net *net = dev_net(dev);
218 struct mr_table *mrt = net->ipv4.mrt;
220 read_lock(&mrt_lock);
221 dev->stats.tx_bytes += skb->len;
222 dev->stats.tx_packets++;
223 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
224 read_unlock(&mrt_lock);
229 static const struct net_device_ops reg_vif_netdev_ops = {
230 .ndo_start_xmit = reg_vif_xmit,
233 static void reg_vif_setup(struct net_device *dev)
235 dev->type = ARPHRD_PIMREG;
236 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
237 dev->flags = IFF_NOARP;
238 dev->netdev_ops = ®_vif_netdev_ops,
239 dev->destructor = free_netdev;
240 dev->features |= NETIF_F_NETNS_LOCAL;
243 static struct net_device *ipmr_reg_vif(struct net *net)
245 struct net_device *dev;
246 struct in_device *in_dev;
248 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
253 dev_net_set(dev, net);
255 if (register_netdevice(dev)) {
262 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
267 ipv4_devconf_setall(in_dev);
268 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
279 /* allow the register to be completed before unregistering. */
283 unregister_netdevice(dev);
290 * @notify: Set to 1, if the caller is a notifier_call
293 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
294 struct list_head *head)
296 struct vif_device *v;
297 struct net_device *dev;
298 struct in_device *in_dev;
300 if (vifi < 0 || vifi >= mrt->maxvif)
301 return -EADDRNOTAVAIL;
303 v = &mrt->vif_table[vifi];
305 write_lock_bh(&mrt_lock);
310 write_unlock_bh(&mrt_lock);
311 return -EADDRNOTAVAIL;
314 #ifdef CONFIG_IP_PIMSM
315 if (vifi == mrt->mroute_reg_vif_num)
316 mrt->mroute_reg_vif_num = -1;
319 if (vifi+1 == mrt->maxvif) {
321 for (tmp=vifi-1; tmp>=0; tmp--) {
322 if (VIF_EXISTS(mrt, tmp))
328 write_unlock_bh(&mrt_lock);
330 dev_set_allmulti(dev, -1);
332 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
333 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
334 ip_rt_multicast_event(in_dev);
337 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
338 unregister_netdevice_queue(dev, head);
344 static inline void ipmr_cache_free(struct mfc_cache *c)
346 kmem_cache_free(mrt_cachep, c);
349 /* Destroy an unresolved cache entry, killing queued skbs
350 and reporting error to netlink readers.
353 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
355 struct net *net = NULL; //mrt->net;
359 atomic_dec(&mrt->cache_resolve_queue_len);
361 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
362 if (ip_hdr(skb)->version == 0) {
363 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
364 nlh->nlmsg_type = NLMSG_ERROR;
365 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
366 skb_trim(skb, nlh->nlmsg_len);
368 e->error = -ETIMEDOUT;
369 memset(&e->msg, 0, sizeof(e->msg));
371 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
380 /* Timer process for the unresolved queue. */
382 static void ipmr_expire_process(unsigned long arg)
384 struct mr_table *mrt = (struct mr_table *)arg;
386 unsigned long expires;
387 struct mfc_cache *c, *next;
389 if (!spin_trylock(&mfc_unres_lock)) {
390 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
394 if (list_empty(&mrt->mfc_unres_queue))
400 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
401 if (time_after(c->mfc_un.unres.expires, now)) {
402 unsigned long interval = c->mfc_un.unres.expires - now;
403 if (interval < expires)
409 ipmr_destroy_unres(mrt, c);
412 if (!list_empty(&mrt->mfc_unres_queue))
413 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
416 spin_unlock(&mfc_unres_lock);
419 /* Fill oifs list. It is called under write locked mrt_lock. */
421 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
426 cache->mfc_un.res.minvif = MAXVIFS;
427 cache->mfc_un.res.maxvif = 0;
428 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
430 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
431 if (VIF_EXISTS(mrt, vifi) &&
432 ttls[vifi] && ttls[vifi] < 255) {
433 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
434 if (cache->mfc_un.res.minvif > vifi)
435 cache->mfc_un.res.minvif = vifi;
436 if (cache->mfc_un.res.maxvif <= vifi)
437 cache->mfc_un.res.maxvif = vifi + 1;
442 static int vif_add(struct net *net, struct mr_table *mrt,
443 struct vifctl *vifc, int mrtsock)
445 int vifi = vifc->vifc_vifi;
446 struct vif_device *v = &mrt->vif_table[vifi];
447 struct net_device *dev;
448 struct in_device *in_dev;
452 if (VIF_EXISTS(mrt, vifi))
455 switch (vifc->vifc_flags) {
456 #ifdef CONFIG_IP_PIMSM
459 * Special Purpose VIF in PIM
460 * All the packets will be sent to the daemon
462 if (mrt->mroute_reg_vif_num >= 0)
464 dev = ipmr_reg_vif(net);
467 err = dev_set_allmulti(dev, 1);
469 unregister_netdevice(dev);
476 dev = ipmr_new_tunnel(net, vifc);
479 err = dev_set_allmulti(dev, 1);
481 ipmr_del_tunnel(dev, vifc);
487 case VIFF_USE_IFINDEX:
489 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
490 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
491 if (dev && dev->ip_ptr == NULL) {
493 return -EADDRNOTAVAIL;
496 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
499 return -EADDRNOTAVAIL;
500 err = dev_set_allmulti(dev, 1);
510 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
512 return -EADDRNOTAVAIL;
514 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
515 ip_rt_multicast_event(in_dev);
518 * Fill in the VIF structures
520 v->rate_limit = vifc->vifc_rate_limit;
521 v->local = vifc->vifc_lcl_addr.s_addr;
522 v->remote = vifc->vifc_rmt_addr.s_addr;
523 v->flags = vifc->vifc_flags;
525 v->flags |= VIFF_STATIC;
526 v->threshold = vifc->vifc_threshold;
531 v->link = dev->ifindex;
532 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
533 v->link = dev->iflink;
535 /* And finish update writing critical data */
536 write_lock_bh(&mrt_lock);
538 #ifdef CONFIG_IP_PIMSM
539 if (v->flags&VIFF_REGISTER)
540 mrt->mroute_reg_vif_num = vifi;
542 if (vifi+1 > mrt->maxvif)
543 mrt->maxvif = vifi+1;
544 write_unlock_bh(&mrt_lock);
548 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
552 int line = MFC_HASH(mcastgrp, origin);
555 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
556 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
563 * Allocate a multicast cache entry
565 static struct mfc_cache *ipmr_cache_alloc(void)
567 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
570 c->mfc_un.res.minvif = MAXVIFS;
574 static struct mfc_cache *ipmr_cache_alloc_unres(void)
576 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
579 skb_queue_head_init(&c->mfc_un.unres.unresolved);
580 c->mfc_un.unres.expires = jiffies + 10*HZ;
585 * A cache entry has gone into a resolved state from queued
588 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
589 struct mfc_cache *uc, struct mfc_cache *c)
595 * Play the pending entries through our router
598 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
599 if (ip_hdr(skb)->version == 0) {
600 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
602 if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
603 nlh->nlmsg_len = (skb_tail_pointer(skb) -
606 nlh->nlmsg_type = NLMSG_ERROR;
607 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
608 skb_trim(skb, nlh->nlmsg_len);
610 e->error = -EMSGSIZE;
611 memset(&e->msg, 0, sizeof(e->msg));
614 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
616 ip_mr_forward(net, mrt, skb, c, 0);
621 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
622 * expects the following bizarre scheme.
624 * Called under mrt_lock.
627 static int ipmr_cache_report(struct mr_table *mrt,
628 struct sk_buff *pkt, vifi_t vifi, int assert)
631 const int ihl = ip_hdrlen(pkt);
632 struct igmphdr *igmp;
636 #ifdef CONFIG_IP_PIMSM
637 if (assert == IGMPMSG_WHOLEPKT)
638 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
641 skb = alloc_skb(128, GFP_ATOMIC);
646 #ifdef CONFIG_IP_PIMSM
647 if (assert == IGMPMSG_WHOLEPKT) {
648 /* Ugly, but we have no choice with this interface.
649 Duplicate old header, fix ihl, length etc.
650 And all this only to mangle msg->im_msgtype and
651 to set msg->im_mbz to "mbz" :-)
653 skb_push(skb, sizeof(struct iphdr));
654 skb_reset_network_header(skb);
655 skb_reset_transport_header(skb);
656 msg = (struct igmpmsg *)skb_network_header(skb);
657 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
658 msg->im_msgtype = IGMPMSG_WHOLEPKT;
660 msg->im_vif = mrt->mroute_reg_vif_num;
661 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
662 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
663 sizeof(struct iphdr));
672 skb->network_header = skb->tail;
674 skb_copy_to_linear_data(skb, pkt->data, ihl);
675 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
676 msg = (struct igmpmsg *)skb_network_header(skb);
678 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
684 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
686 msg->im_msgtype = assert;
688 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
689 skb->transport_header = skb->network_header;
692 if (mrt->mroute_sk == NULL) {
700 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
703 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
711 * Queue a packet for resolution. It gets locked cache entry!
715 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
720 const struct iphdr *iph = ip_hdr(skb);
722 spin_lock_bh(&mfc_unres_lock);
723 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
724 if (c->mfc_mcastgrp == iph->daddr &&
725 c->mfc_origin == iph->saddr) {
733 * Create a new entry if allowable
736 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
737 (c = ipmr_cache_alloc_unres()) == NULL) {
738 spin_unlock_bh(&mfc_unres_lock);
745 * Fill in the new cache entry
748 c->mfc_origin = iph->saddr;
749 c->mfc_mcastgrp = iph->daddr;
752 * Reflect first query at mrouted.
754 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
756 /* If the report failed throw the cache entry
759 spin_unlock_bh(&mfc_unres_lock);
766 atomic_inc(&mrt->cache_resolve_queue_len);
767 list_add(&c->list, &mrt->mfc_unres_queue);
769 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
773 * See if we can append the packet
775 if (c->mfc_un.unres.unresolved.qlen>3) {
779 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
783 spin_unlock_bh(&mfc_unres_lock);
788 * MFC cache manipulation by user space mroute daemon
791 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
794 struct mfc_cache *c, *next;
796 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
798 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
799 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
800 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
801 write_lock_bh(&mrt_lock);
803 write_unlock_bh(&mrt_lock);
812 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
813 struct mfcctl *mfc, int mrtsock)
817 struct mfc_cache *uc, *c;
819 if (mfc->mfcc_parent >= MAXVIFS)
822 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
824 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
825 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
826 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
833 write_lock_bh(&mrt_lock);
834 c->mfc_parent = mfc->mfcc_parent;
835 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
837 c->mfc_flags |= MFC_STATIC;
838 write_unlock_bh(&mrt_lock);
842 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
845 c = ipmr_cache_alloc();
849 c->mfc_origin = mfc->mfcc_origin.s_addr;
850 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
851 c->mfc_parent = mfc->mfcc_parent;
852 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
854 c->mfc_flags |= MFC_STATIC;
856 write_lock_bh(&mrt_lock);
857 list_add(&c->list, &mrt->mfc_cache_array[line]);
858 write_unlock_bh(&mrt_lock);
861 * Check to see if we resolved a queued list. If so we
862 * need to send on the frames and tidy up.
864 spin_lock_bh(&mfc_unres_lock);
865 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
866 if (uc->mfc_origin == c->mfc_origin &&
867 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
869 atomic_dec(&mrt->cache_resolve_queue_len);
873 if (list_empty(&mrt->mfc_unres_queue))
874 del_timer(&mrt->ipmr_expire_timer);
875 spin_unlock_bh(&mfc_unres_lock);
878 ipmr_cache_resolve(net, mrt, uc, c);
885 * Close the multicast socket, and clear the vif tables etc
888 static void mroute_clean_tables(struct mr_table *mrt)
892 struct mfc_cache *c, *next;
895 * Shut down all active vif entries
897 for (i = 0; i < mrt->maxvif; i++) {
898 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
899 vif_delete(mrt, i, 0, &list);
901 unregister_netdevice_many(&list);
906 for (i = 0; i < MFC_LINES; i++) {
907 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
908 if (c->mfc_flags&MFC_STATIC)
910 write_lock_bh(&mrt_lock);
912 write_unlock_bh(&mrt_lock);
918 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
919 spin_lock_bh(&mfc_unres_lock);
920 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
922 ipmr_destroy_unres(mrt, c);
924 spin_unlock_bh(&mfc_unres_lock);
928 static void mrtsock_destruct(struct sock *sk)
930 struct net *net = sock_net(sk);
931 struct mr_table *mrt = net->ipv4.mrt;
934 if (sk == mrt->mroute_sk) {
935 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
937 write_lock_bh(&mrt_lock);
938 mrt->mroute_sk = NULL;
939 write_unlock_bh(&mrt_lock);
941 mroute_clean_tables(mrt);
947 * Socket options and virtual interface manipulation. The whole
948 * virtual interface system is a complete heap, but unfortunately
949 * that's how BSD mrouted happens to think. Maybe one day with a proper
950 * MOSPF/PIM router set up we can clean this up.
953 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
958 struct net *net = sock_net(sk);
959 struct mr_table *mrt = net->ipv4.mrt;
961 if (optname != MRT_INIT) {
962 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
968 if (sk->sk_type != SOCK_RAW ||
969 inet_sk(sk)->inet_num != IPPROTO_IGMP)
971 if (optlen != sizeof(int))
975 if (mrt->mroute_sk) {
980 ret = ip_ra_control(sk, 1, mrtsock_destruct);
982 write_lock_bh(&mrt_lock);
984 write_unlock_bh(&mrt_lock);
986 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
991 if (sk != mrt->mroute_sk)
993 return ip_ra_control(sk, 0, NULL);
996 if (optlen != sizeof(vif))
998 if (copy_from_user(&vif, optval, sizeof(vif)))
1000 if (vif.vifc_vifi >= MAXVIFS)
1003 if (optname == MRT_ADD_VIF) {
1004 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1006 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1012 * Manipulate the forwarding caches. These live
1013 * in a sort of kernel/user symbiosis.
1017 if (optlen != sizeof(mfc))
1019 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1022 if (optname == MRT_DEL_MFC)
1023 ret = ipmr_mfc_delete(mrt, &mfc);
1025 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1029 * Control PIM assert.
1034 if (get_user(v,(int __user *)optval))
1036 mrt->mroute_do_assert = (v) ? 1 : 0;
1039 #ifdef CONFIG_IP_PIMSM
1044 if (get_user(v,(int __user *)optval))
1050 if (v != mrt->mroute_do_pim) {
1051 mrt->mroute_do_pim = v;
1052 mrt->mroute_do_assert = v;
1059 * Spurious command, or MRT_VERSION which you cannot
1063 return -ENOPROTOOPT;
1068 * Getsock opt support for the multicast routing system.
1071 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1075 struct net *net = sock_net(sk);
1076 struct mr_table *mrt = net->ipv4.mrt;
1078 if (optname != MRT_VERSION &&
1079 #ifdef CONFIG_IP_PIMSM
1082 optname!=MRT_ASSERT)
1083 return -ENOPROTOOPT;
1085 if (get_user(olr, optlen))
1088 olr = min_t(unsigned int, olr, sizeof(int));
1092 if (put_user(olr, optlen))
1094 if (optname == MRT_VERSION)
1096 #ifdef CONFIG_IP_PIMSM
1097 else if (optname == MRT_PIM)
1098 val = mrt->mroute_do_pim;
1101 val = mrt->mroute_do_assert;
1102 if (copy_to_user(optval, &val, olr))
1108 * The IP multicast ioctl support routines.
1111 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1113 struct sioc_sg_req sr;
1114 struct sioc_vif_req vr;
1115 struct vif_device *vif;
1116 struct mfc_cache *c;
1117 struct net *net = sock_net(sk);
1118 struct mr_table *mrt = net->ipv4.mrt;
1122 if (copy_from_user(&vr, arg, sizeof(vr)))
1124 if (vr.vifi >= mrt->maxvif)
1126 read_lock(&mrt_lock);
1127 vif = &mrt->vif_table[vr.vifi];
1128 if (VIF_EXISTS(mrt, vr.vifi)) {
1129 vr.icount = vif->pkt_in;
1130 vr.ocount = vif->pkt_out;
1131 vr.ibytes = vif->bytes_in;
1132 vr.obytes = vif->bytes_out;
1133 read_unlock(&mrt_lock);
1135 if (copy_to_user(arg, &vr, sizeof(vr)))
1139 read_unlock(&mrt_lock);
1140 return -EADDRNOTAVAIL;
1142 if (copy_from_user(&sr, arg, sizeof(sr)))
1145 read_lock(&mrt_lock);
1146 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1148 sr.pktcnt = c->mfc_un.res.pkt;
1149 sr.bytecnt = c->mfc_un.res.bytes;
1150 sr.wrong_if = c->mfc_un.res.wrong_if;
1151 read_unlock(&mrt_lock);
1153 if (copy_to_user(arg, &sr, sizeof(sr)))
1157 read_unlock(&mrt_lock);
1158 return -EADDRNOTAVAIL;
1160 return -ENOIOCTLCMD;
1165 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1167 struct net_device *dev = ptr;
1168 struct net *net = dev_net(dev);
1169 struct mr_table *mrt = net->ipv4.mrt;
1170 struct vif_device *v;
1174 if (event != NETDEV_UNREGISTER)
1176 v = &mrt->vif_table[0];
1177 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1179 vif_delete(mrt, ct, 1, &list);
1181 unregister_netdevice_many(&list);
1186 static struct notifier_block ip_mr_notifier = {
1187 .notifier_call = ipmr_device_event,
1191 * Encapsulate a packet by attaching a valid IPIP header to it.
1192 * This avoids tunnel drivers and other mess and gives us the speed so
1193 * important for multicast video.
1196 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1199 struct iphdr *old_iph = ip_hdr(skb);
1201 skb_push(skb, sizeof(struct iphdr));
1202 skb->transport_header = skb->network_header;
1203 skb_reset_network_header(skb);
1207 iph->tos = old_iph->tos;
1208 iph->ttl = old_iph->ttl;
1212 iph->protocol = IPPROTO_IPIP;
1214 iph->tot_len = htons(skb->len);
1215 ip_select_ident(iph, skb_dst(skb), NULL);
1218 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1222 static inline int ipmr_forward_finish(struct sk_buff *skb)
1224 struct ip_options * opt = &(IPCB(skb)->opt);
1226 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1228 if (unlikely(opt->optlen))
1229 ip_forward_options(skb);
1231 return dst_output(skb);
1235 * Processing handlers for ipmr_forward
1238 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1239 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1241 const struct iphdr *iph = ip_hdr(skb);
1242 struct vif_device *vif = &mrt->vif_table[vifi];
1243 struct net_device *dev;
1247 if (vif->dev == NULL)
1250 #ifdef CONFIG_IP_PIMSM
1251 if (vif->flags & VIFF_REGISTER) {
1253 vif->bytes_out += skb->len;
1254 vif->dev->stats.tx_bytes += skb->len;
1255 vif->dev->stats.tx_packets++;
1256 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1261 if (vif->flags&VIFF_TUNNEL) {
1262 struct flowi fl = { .oif = vif->link,
1264 { .daddr = vif->remote,
1265 .saddr = vif->local,
1266 .tos = RT_TOS(iph->tos) } },
1267 .proto = IPPROTO_IPIP };
1268 if (ip_route_output_key(net, &rt, &fl))
1270 encap = sizeof(struct iphdr);
1272 struct flowi fl = { .oif = vif->link,
1274 { .daddr = iph->daddr,
1275 .tos = RT_TOS(iph->tos) } },
1276 .proto = IPPROTO_IPIP };
1277 if (ip_route_output_key(net, &rt, &fl))
1281 dev = rt->u.dst.dev;
1283 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1284 /* Do not fragment multicasts. Alas, IPv4 does not
1285 allow to send ICMP, so that packets will disappear
1289 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1294 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1296 if (skb_cow(skb, encap)) {
1302 vif->bytes_out += skb->len;
1305 skb_dst_set(skb, &rt->u.dst);
1306 ip_decrease_ttl(ip_hdr(skb));
1308 /* FIXME: forward and output firewalls used to be called here.
1309 * What do we do with netfilter? -- RR */
1310 if (vif->flags & VIFF_TUNNEL) {
1311 ip_encap(skb, vif->local, vif->remote);
1312 /* FIXME: extra output firewall step used to be here. --RR */
1313 vif->dev->stats.tx_packets++;
1314 vif->dev->stats.tx_bytes += skb->len;
1317 IPCB(skb)->flags |= IPSKB_FORWARDED;
1320 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1321 * not only before forwarding, but after forwarding on all output
1322 * interfaces. It is clear, if mrouter runs a multicasting
1323 * program, it should receive packets not depending to what interface
1324 * program is joined.
1325 * If we will not make it, the program will have to join on all
1326 * interfaces. On the other hand, multihoming host (or router, but
1327 * not mrouter) cannot join to more than one interface - it will
1328 * result in receiving multiple packets.
1330 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1331 ipmr_forward_finish);
1339 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1343 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1344 if (mrt->vif_table[ct].dev == dev)
1350 /* "local" means that we should preserve one skb (for local delivery) */
1352 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1353 struct sk_buff *skb, struct mfc_cache *cache,
1359 vif = cache->mfc_parent;
1360 cache->mfc_un.res.pkt++;
1361 cache->mfc_un.res.bytes += skb->len;
1364 * Wrong interface: drop packet and (maybe) send PIM assert.
1366 if (mrt->vif_table[vif].dev != skb->dev) {
1369 if (skb_rtable(skb)->fl.iif == 0) {
1370 /* It is our own packet, looped back.
1371 Very complicated situation...
1373 The best workaround until routing daemons will be
1374 fixed is not to redistribute packet, if it was
1375 send through wrong interface. It means, that
1376 multicast applications WILL NOT work for
1377 (S,G), which have default multicast route pointing
1378 to wrong oif. In any case, it is not a good
1379 idea to use multicasting applications on router.
1384 cache->mfc_un.res.wrong_if++;
1385 true_vifi = ipmr_find_vif(mrt, skb->dev);
1387 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1388 /* pimsm uses asserts, when switching from RPT to SPT,
1389 so that we cannot check that packet arrived on an oif.
1390 It is bad, but otherwise we would need to move pretty
1391 large chunk of pimd to kernel. Ough... --ANK
1393 (mrt->mroute_do_pim ||
1394 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1396 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1397 cache->mfc_un.res.last_assert = jiffies;
1398 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1403 mrt->vif_table[vif].pkt_in++;
1404 mrt->vif_table[vif].bytes_in += skb->len;
1409 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1410 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1412 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1414 ipmr_queue_xmit(net, mrt, skb2, cache,
1422 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1424 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1426 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1439 * Multicast packets for forwarding arrive here
1442 int ip_mr_input(struct sk_buff *skb)
1444 struct mfc_cache *cache;
1445 struct net *net = dev_net(skb->dev);
1446 struct mr_table *mrt = net->ipv4.mrt;
1447 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1449 /* Packet is looped back after forward, it should not be
1450 forwarded second time, but still can be delivered locally.
1452 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1456 if (IPCB(skb)->opt.router_alert) {
1457 if (ip_call_ra_chain(skb))
1459 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1460 /* IGMPv1 (and broken IGMPv2 implementations sort of
1461 Cisco IOS <= 11.2(8)) do not put router alert
1462 option to IGMP packets destined to routable
1463 groups. It is very bad, because it means
1464 that we can forward NO IGMP messages.
1466 read_lock(&mrt_lock);
1467 if (mrt->mroute_sk) {
1469 raw_rcv(mrt->mroute_sk, skb);
1470 read_unlock(&mrt_lock);
1473 read_unlock(&mrt_lock);
1477 read_lock(&mrt_lock);
1478 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1481 * No usable cache entry
1483 if (cache == NULL) {
1487 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1488 ip_local_deliver(skb);
1490 read_unlock(&mrt_lock);
1496 vif = ipmr_find_vif(mrt, skb->dev);
1498 int err = ipmr_cache_unresolved(mrt, vif, skb);
1499 read_unlock(&mrt_lock);
1503 read_unlock(&mrt_lock);
1508 ip_mr_forward(net, mrt, skb, cache, local);
1510 read_unlock(&mrt_lock);
1513 return ip_local_deliver(skb);
1519 return ip_local_deliver(skb);
1524 #ifdef CONFIG_IP_PIMSM
1525 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1527 struct net_device *reg_dev = NULL;
1528 struct iphdr *encap;
1529 struct net *net = dev_net(skb->dev);
1530 struct mr_table *mrt = net->ipv4.mrt;
1532 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1535 a. packet is really destinted to a multicast group
1536 b. packet is not a NULL-REGISTER
1537 c. packet is not truncated
1539 if (!ipv4_is_multicast(encap->daddr) ||
1540 encap->tot_len == 0 ||
1541 ntohs(encap->tot_len) + pimlen > skb->len)
1544 read_lock(&mrt_lock);
1545 if (mrt->mroute_reg_vif_num >= 0)
1546 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1549 read_unlock(&mrt_lock);
1551 if (reg_dev == NULL)
1554 skb->mac_header = skb->network_header;
1555 skb_pull(skb, (u8*)encap - skb->data);
1556 skb_reset_network_header(skb);
1558 skb->protocol = htons(ETH_P_IP);
1560 skb->pkt_type = PACKET_HOST;
1562 reg_dev->stats.rx_bytes += skb->len;
1563 reg_dev->stats.rx_packets++;
1572 #ifdef CONFIG_IP_PIMSM_V1
1574 * Handle IGMP messages of PIMv1
1577 int pim_rcv_v1(struct sk_buff * skb)
1579 struct igmphdr *pim;
1580 struct net *net = dev_net(skb->dev);
1581 struct mr_table *mrt = net->ipv4.mrt;
1583 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1586 pim = igmp_hdr(skb);
1588 if (!mrt->mroute_do_pim ||
1589 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1592 if (__pim_rcv(skb, sizeof(*pim))) {
1600 #ifdef CONFIG_IP_PIMSM_V2
1601 static int pim_rcv(struct sk_buff * skb)
1603 struct pimreghdr *pim;
1605 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1608 pim = (struct pimreghdr *)skb_transport_header(skb);
1609 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1610 (pim->flags&PIM_NULL_REGISTER) ||
1611 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1612 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1615 if (__pim_rcv(skb, sizeof(*pim))) {
1624 ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
1628 struct rtnexthop *nhp;
1629 u8 *b = skb_tail_pointer(skb);
1630 struct rtattr *mp_head;
1632 /* If cache is unresolved, don't try to parse IIF and OIF */
1633 if (c->mfc_parent > MAXVIFS)
1636 if (VIF_EXISTS(mrt, c->mfc_parent))
1637 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1639 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1641 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1642 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1643 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1644 goto rtattr_failure;
1645 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1646 nhp->rtnh_flags = 0;
1647 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1648 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1649 nhp->rtnh_len = sizeof(*nhp);
1652 mp_head->rta_type = RTA_MULTIPATH;
1653 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1654 rtm->rtm_type = RTN_MULTICAST;
1662 int ipmr_get_route(struct net *net,
1663 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1666 struct mr_table *mrt = net->ipv4.mrt;
1667 struct mfc_cache *cache;
1668 struct rtable *rt = skb_rtable(skb);
1670 read_lock(&mrt_lock);
1671 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1673 if (cache == NULL) {
1674 struct sk_buff *skb2;
1676 struct net_device *dev;
1680 read_unlock(&mrt_lock);
1685 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1686 read_unlock(&mrt_lock);
1689 skb2 = skb_clone(skb, GFP_ATOMIC);
1691 read_unlock(&mrt_lock);
1695 skb_push(skb2, sizeof(struct iphdr));
1696 skb_reset_network_header(skb2);
1698 iph->ihl = sizeof(struct iphdr) >> 2;
1699 iph->saddr = rt->rt_src;
1700 iph->daddr = rt->rt_dst;
1702 err = ipmr_cache_unresolved(mrt, vif, skb2);
1703 read_unlock(&mrt_lock);
1707 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1708 cache->mfc_flags |= MFC_NOTIFY;
1709 err = ipmr_fill_mroute(mrt, skb, cache, rtm);
1710 read_unlock(&mrt_lock);
1714 #ifdef CONFIG_PROC_FS
1716 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1718 struct ipmr_vif_iter {
1719 struct seq_net_private p;
1723 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1724 struct ipmr_vif_iter *iter,
1727 struct mr_table *mrt = net->ipv4.mrt;
1729 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
1730 if (!VIF_EXISTS(mrt, iter->ct))
1733 return &mrt->vif_table[iter->ct];
1738 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1739 __acquires(mrt_lock)
1741 struct net *net = seq_file_net(seq);
1743 read_lock(&mrt_lock);
1744 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1748 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1750 struct ipmr_vif_iter *iter = seq->private;
1751 struct net *net = seq_file_net(seq);
1752 struct mr_table *mrt = net->ipv4.mrt;
1755 if (v == SEQ_START_TOKEN)
1756 return ipmr_vif_seq_idx(net, iter, 0);
1758 while (++iter->ct < mrt->maxvif) {
1759 if (!VIF_EXISTS(mrt, iter->ct))
1761 return &mrt->vif_table[iter->ct];
1766 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1767 __releases(mrt_lock)
1769 read_unlock(&mrt_lock);
1772 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1774 struct net *net = seq_file_net(seq);
1775 struct mr_table *mrt = net->ipv4.mrt;
1777 if (v == SEQ_START_TOKEN) {
1779 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1781 const struct vif_device *vif = v;
1782 const char *name = vif->dev ? vif->dev->name : "none";
1785 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1786 vif - mrt->vif_table,
1787 name, vif->bytes_in, vif->pkt_in,
1788 vif->bytes_out, vif->pkt_out,
1789 vif->flags, vif->local, vif->remote);
1794 static const struct seq_operations ipmr_vif_seq_ops = {
1795 .start = ipmr_vif_seq_start,
1796 .next = ipmr_vif_seq_next,
1797 .stop = ipmr_vif_seq_stop,
1798 .show = ipmr_vif_seq_show,
1801 static int ipmr_vif_open(struct inode *inode, struct file *file)
1803 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1804 sizeof(struct ipmr_vif_iter));
1807 static const struct file_operations ipmr_vif_fops = {
1808 .owner = THIS_MODULE,
1809 .open = ipmr_vif_open,
1811 .llseek = seq_lseek,
1812 .release = seq_release_net,
1815 struct ipmr_mfc_iter {
1816 struct seq_net_private p;
1817 struct list_head *cache;
1822 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1823 struct ipmr_mfc_iter *it, loff_t pos)
1825 struct mr_table *mrt = net->ipv4.mrt;
1826 struct mfc_cache *mfc;
1828 read_lock(&mrt_lock);
1829 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
1830 it->cache = &mrt->mfc_cache_array[it->ct];
1831 list_for_each_entry(mfc, it->cache, list)
1835 read_unlock(&mrt_lock);
1837 spin_lock_bh(&mfc_unres_lock);
1838 it->cache = &mrt->mfc_unres_queue;
1839 list_for_each_entry(mfc, it->cache, list)
1842 spin_unlock_bh(&mfc_unres_lock);
1849 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1851 struct ipmr_mfc_iter *it = seq->private;
1852 struct net *net = seq_file_net(seq);
1856 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1860 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1862 struct mfc_cache *mfc = v;
1863 struct ipmr_mfc_iter *it = seq->private;
1864 struct net *net = seq_file_net(seq);
1865 struct mr_table *mrt = net->ipv4.mrt;
1869 if (v == SEQ_START_TOKEN)
1870 return ipmr_mfc_seq_idx(net, seq->private, 0);
1872 if (mfc->list.next != it->cache)
1873 return list_entry(mfc->list.next, struct mfc_cache, list);
1875 if (it->cache == &mrt->mfc_unres_queue)
1878 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1880 while (++it->ct < MFC_LINES) {
1881 it->cache = &mrt->mfc_cache_array[it->ct];
1882 if (list_empty(it->cache))
1884 return list_first_entry(it->cache, struct mfc_cache, list);
1887 /* exhausted cache_array, show unresolved */
1888 read_unlock(&mrt_lock);
1889 it->cache = &mrt->mfc_unres_queue;
1892 spin_lock_bh(&mfc_unres_lock);
1893 if (!list_empty(it->cache))
1894 return list_first_entry(it->cache, struct mfc_cache, list);
1897 spin_unlock_bh(&mfc_unres_lock);
1903 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1905 struct ipmr_mfc_iter *it = seq->private;
1906 struct net *net = seq_file_net(seq);
1907 struct mr_table *mrt = net->ipv4.mrt;
1909 if (it->cache == &mrt->mfc_unres_queue)
1910 spin_unlock_bh(&mfc_unres_lock);
1911 else if (it->cache == &mrt->mfc_cache_array[it->ct])
1912 read_unlock(&mrt_lock);
1915 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1918 struct net *net = seq_file_net(seq);
1919 struct mr_table *mrt = net->ipv4.mrt;
1921 if (v == SEQ_START_TOKEN) {
1923 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1925 const struct mfc_cache *mfc = v;
1926 const struct ipmr_mfc_iter *it = seq->private;
1928 seq_printf(seq, "%08lX %08lX %-3hd",
1929 (unsigned long) mfc->mfc_mcastgrp,
1930 (unsigned long) mfc->mfc_origin,
1933 if (it->cache != &mrt->mfc_unres_queue) {
1934 seq_printf(seq, " %8lu %8lu %8lu",
1935 mfc->mfc_un.res.pkt,
1936 mfc->mfc_un.res.bytes,
1937 mfc->mfc_un.res.wrong_if);
1938 for (n = mfc->mfc_un.res.minvif;
1939 n < mfc->mfc_un.res.maxvif; n++ ) {
1940 if (VIF_EXISTS(mrt, n) &&
1941 mfc->mfc_un.res.ttls[n] < 255)
1944 n, mfc->mfc_un.res.ttls[n]);
1947 /* unresolved mfc_caches don't contain
1948 * pkt, bytes and wrong_if values
1950 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1952 seq_putc(seq, '\n');
1957 static const struct seq_operations ipmr_mfc_seq_ops = {
1958 .start = ipmr_mfc_seq_start,
1959 .next = ipmr_mfc_seq_next,
1960 .stop = ipmr_mfc_seq_stop,
1961 .show = ipmr_mfc_seq_show,
1964 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1966 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1967 sizeof(struct ipmr_mfc_iter));
1970 static const struct file_operations ipmr_mfc_fops = {
1971 .owner = THIS_MODULE,
1972 .open = ipmr_mfc_open,
1974 .llseek = seq_lseek,
1975 .release = seq_release_net,
1979 #ifdef CONFIG_IP_PIMSM_V2
1980 static const struct net_protocol pim_protocol = {
1988 * Setup for IP multicast routing
1990 static int __net_init ipmr_net_init(struct net *net)
1992 struct mr_table *mrt;
1996 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
2002 /* Forwarding cache */
2003 for (i = 0; i < MFC_LINES; i++)
2004 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
2006 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
2008 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
2009 (unsigned long)net);
2011 #ifdef CONFIG_IP_PIMSM
2012 mrt->mroute_reg_vif_num = -1;
2015 #ifdef CONFIG_PROC_FS
2017 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2019 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2020 goto proc_cache_fail;
2023 net->ipv4.mrt = mrt;
2026 #ifdef CONFIG_PROC_FS
2028 proc_net_remove(net, "ip_mr_vif");
2036 static void __net_exit ipmr_net_exit(struct net *net)
2038 #ifdef CONFIG_PROC_FS
2039 proc_net_remove(net, "ip_mr_cache");
2040 proc_net_remove(net, "ip_mr_vif");
2042 kfree(net->ipv4.mrt);
2045 static struct pernet_operations ipmr_net_ops = {
2046 .init = ipmr_net_init,
2047 .exit = ipmr_net_exit,
2050 int __init ip_mr_init(void)
2054 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2055 sizeof(struct mfc_cache),
2056 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2061 err = register_pernet_subsys(&ipmr_net_ops);
2063 goto reg_pernet_fail;
2065 err = register_netdevice_notifier(&ip_mr_notifier);
2067 goto reg_notif_fail;
2068 #ifdef CONFIG_IP_PIMSM_V2
2069 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2070 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2072 goto add_proto_fail;
2077 #ifdef CONFIG_IP_PIMSM_V2
2079 unregister_netdevice_notifier(&ip_mr_notifier);
2082 unregister_pernet_subsys(&ipmr_net_ops);
2084 kmem_cache_destroy(mrt_cachep);