ipv4: ipmr: convert struct mfc_cache to struct list_head
[platform/adaptation/renesas_rcar/renesas_kernel.git] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requrement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
56 #include <net/sock.h>
57 #include <net/icmp.h>
58 #include <net/udp.h>
59 #include <net/raw.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66
67 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68 #define CONFIG_IP_PIMSM 1
69 #endif
70
71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
72    Note that the changes are semaphored via rtnl_lock.
73  */
74
75 static DEFINE_RWLOCK(mrt_lock);
76
77 /*
78  *      Multicast router control variables
79  */
80
81 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
82
83 /* Special spinlock for queue of unresolved entries */
84 static DEFINE_SPINLOCK(mfc_unres_lock);
85
86 /* We return to original Alan's scheme. Hash table of resolved
87    entries is changed only in process context and protected
88    with weak lock mrt_lock. Queue of unresolved entries is protected
89    with strong spinlock mfc_unres_lock.
90
91    In this case data path is free of exclusive locks at all.
92  */
93
94 static struct kmem_cache *mrt_cachep __read_mostly;
95
96 static int ip_mr_forward(struct net *net, struct sk_buff *skb,
97                          struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99                              struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct net *net, struct sk_buff *skb,
101                             struct mfc_cache *c, struct rtmsg *rtm);
102
103 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
104
105 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
106 {
107         struct net *net = dev_net(dev);
108
109         dev_close(dev);
110
111         dev = __dev_get_by_name(net, "tunl0");
112         if (dev) {
113                 const struct net_device_ops *ops = dev->netdev_ops;
114                 struct ifreq ifr;
115                 struct ip_tunnel_parm p;
116
117                 memset(&p, 0, sizeof(p));
118                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
119                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
120                 p.iph.version = 4;
121                 p.iph.ihl = 5;
122                 p.iph.protocol = IPPROTO_IPIP;
123                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
124                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
125
126                 if (ops->ndo_do_ioctl) {
127                         mm_segment_t oldfs = get_fs();
128
129                         set_fs(KERNEL_DS);
130                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
131                         set_fs(oldfs);
132                 }
133         }
134 }
135
136 static
137 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
138 {
139         struct net_device  *dev;
140
141         dev = __dev_get_by_name(net, "tunl0");
142
143         if (dev) {
144                 const struct net_device_ops *ops = dev->netdev_ops;
145                 int err;
146                 struct ifreq ifr;
147                 struct ip_tunnel_parm p;
148                 struct in_device  *in_dev;
149
150                 memset(&p, 0, sizeof(p));
151                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
152                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
153                 p.iph.version = 4;
154                 p.iph.ihl = 5;
155                 p.iph.protocol = IPPROTO_IPIP;
156                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
157                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
158
159                 if (ops->ndo_do_ioctl) {
160                         mm_segment_t oldfs = get_fs();
161
162                         set_fs(KERNEL_DS);
163                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
164                         set_fs(oldfs);
165                 } else
166                         err = -EOPNOTSUPP;
167
168                 dev = NULL;
169
170                 if (err == 0 &&
171                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
172                         dev->flags |= IFF_MULTICAST;
173
174                         in_dev = __in_dev_get_rtnl(dev);
175                         if (in_dev == NULL)
176                                 goto failure;
177
178                         ipv4_devconf_setall(in_dev);
179                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
180
181                         if (dev_open(dev))
182                                 goto failure;
183                         dev_hold(dev);
184                 }
185         }
186         return dev;
187
188 failure:
189         /* allow the register to be completed before unregistering. */
190         rtnl_unlock();
191         rtnl_lock();
192
193         unregister_netdevice(dev);
194         return NULL;
195 }
196
197 #ifdef CONFIG_IP_PIMSM
198
199 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
200 {
201         struct net *net = dev_net(dev);
202
203         read_lock(&mrt_lock);
204         dev->stats.tx_bytes += skb->len;
205         dev->stats.tx_packets++;
206         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
207                           IGMPMSG_WHOLEPKT);
208         read_unlock(&mrt_lock);
209         kfree_skb(skb);
210         return NETDEV_TX_OK;
211 }
212
213 static const struct net_device_ops reg_vif_netdev_ops = {
214         .ndo_start_xmit = reg_vif_xmit,
215 };
216
217 static void reg_vif_setup(struct net_device *dev)
218 {
219         dev->type               = ARPHRD_PIMREG;
220         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
221         dev->flags              = IFF_NOARP;
222         dev->netdev_ops         = &reg_vif_netdev_ops,
223         dev->destructor         = free_netdev;
224         dev->features           |= NETIF_F_NETNS_LOCAL;
225 }
226
227 static struct net_device *ipmr_reg_vif(struct net *net)
228 {
229         struct net_device *dev;
230         struct in_device *in_dev;
231
232         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
233
234         if (dev == NULL)
235                 return NULL;
236
237         dev_net_set(dev, net);
238
239         if (register_netdevice(dev)) {
240                 free_netdev(dev);
241                 return NULL;
242         }
243         dev->iflink = 0;
244
245         rcu_read_lock();
246         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
247                 rcu_read_unlock();
248                 goto failure;
249         }
250
251         ipv4_devconf_setall(in_dev);
252         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
253         rcu_read_unlock();
254
255         if (dev_open(dev))
256                 goto failure;
257
258         dev_hold(dev);
259
260         return dev;
261
262 failure:
263         /* allow the register to be completed before unregistering. */
264         rtnl_unlock();
265         rtnl_lock();
266
267         unregister_netdevice(dev);
268         return NULL;
269 }
270 #endif
271
272 /*
273  *      Delete a VIF entry
274  *      @notify: Set to 1, if the caller is a notifier_call
275  */
276
277 static int vif_delete(struct net *net, int vifi, int notify,
278                       struct list_head *head)
279 {
280         struct vif_device *v;
281         struct net_device *dev;
282         struct in_device *in_dev;
283
284         if (vifi < 0 || vifi >= net->ipv4.maxvif)
285                 return -EADDRNOTAVAIL;
286
287         v = &net->ipv4.vif_table[vifi];
288
289         write_lock_bh(&mrt_lock);
290         dev = v->dev;
291         v->dev = NULL;
292
293         if (!dev) {
294                 write_unlock_bh(&mrt_lock);
295                 return -EADDRNOTAVAIL;
296         }
297
298 #ifdef CONFIG_IP_PIMSM
299         if (vifi == net->ipv4.mroute_reg_vif_num)
300                 net->ipv4.mroute_reg_vif_num = -1;
301 #endif
302
303         if (vifi+1 == net->ipv4.maxvif) {
304                 int tmp;
305                 for (tmp=vifi-1; tmp>=0; tmp--) {
306                         if (VIF_EXISTS(net, tmp))
307                                 break;
308                 }
309                 net->ipv4.maxvif = tmp+1;
310         }
311
312         write_unlock_bh(&mrt_lock);
313
314         dev_set_allmulti(dev, -1);
315
316         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
317                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
318                 ip_rt_multicast_event(in_dev);
319         }
320
321         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
322                 unregister_netdevice_queue(dev, head);
323
324         dev_put(dev);
325         return 0;
326 }
327
328 static inline void ipmr_cache_free(struct mfc_cache *c)
329 {
330         kmem_cache_free(mrt_cachep, c);
331 }
332
333 /* Destroy an unresolved cache entry, killing queued skbs
334    and reporting error to netlink readers.
335  */
336
337 static void ipmr_destroy_unres(struct net *net, struct mfc_cache *c)
338 {
339         struct sk_buff *skb;
340         struct nlmsgerr *e;
341
342         atomic_dec(&net->ipv4.cache_resolve_queue_len);
343
344         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
345                 if (ip_hdr(skb)->version == 0) {
346                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
347                         nlh->nlmsg_type = NLMSG_ERROR;
348                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
349                         skb_trim(skb, nlh->nlmsg_len);
350                         e = NLMSG_DATA(nlh);
351                         e->error = -ETIMEDOUT;
352                         memset(&e->msg, 0, sizeof(e->msg));
353
354                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
355                 } else
356                         kfree_skb(skb);
357         }
358
359         ipmr_cache_free(c);
360 }
361
362
363 /* Timer process for the unresolved queue. */
364
365 static void ipmr_expire_process(unsigned long arg)
366 {
367         struct net *net = (struct net *)arg;
368         unsigned long now;
369         unsigned long expires;
370         struct mfc_cache *c, *next;
371
372         if (!spin_trylock(&mfc_unres_lock)) {
373                 mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
374                 return;
375         }
376
377         if (list_empty(&net->ipv4.mfc_unres_queue))
378                 goto out;
379
380         now = jiffies;
381         expires = 10*HZ;
382
383         list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
384                 if (time_after(c->mfc_un.unres.expires, now)) {
385                         unsigned long interval = c->mfc_un.unres.expires - now;
386                         if (interval < expires)
387                                 expires = interval;
388                         continue;
389                 }
390
391                 list_del(&c->list);
392                 ipmr_destroy_unres(net, c);
393         }
394
395         if (!list_empty(&net->ipv4.mfc_unres_queue))
396                 mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
397
398 out:
399         spin_unlock(&mfc_unres_lock);
400 }
401
402 /* Fill oifs list. It is called under write locked mrt_lock. */
403
404 static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
405                                    unsigned char *ttls)
406 {
407         int vifi;
408
409         cache->mfc_un.res.minvif = MAXVIFS;
410         cache->mfc_un.res.maxvif = 0;
411         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
412
413         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
414                 if (VIF_EXISTS(net, vifi) &&
415                     ttls[vifi] && ttls[vifi] < 255) {
416                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
417                         if (cache->mfc_un.res.minvif > vifi)
418                                 cache->mfc_un.res.minvif = vifi;
419                         if (cache->mfc_un.res.maxvif <= vifi)
420                                 cache->mfc_un.res.maxvif = vifi + 1;
421                 }
422         }
423 }
424
425 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
426 {
427         int vifi = vifc->vifc_vifi;
428         struct vif_device *v = &net->ipv4.vif_table[vifi];
429         struct net_device *dev;
430         struct in_device *in_dev;
431         int err;
432
433         /* Is vif busy ? */
434         if (VIF_EXISTS(net, vifi))
435                 return -EADDRINUSE;
436
437         switch (vifc->vifc_flags) {
438 #ifdef CONFIG_IP_PIMSM
439         case VIFF_REGISTER:
440                 /*
441                  * Special Purpose VIF in PIM
442                  * All the packets will be sent to the daemon
443                  */
444                 if (net->ipv4.mroute_reg_vif_num >= 0)
445                         return -EADDRINUSE;
446                 dev = ipmr_reg_vif(net);
447                 if (!dev)
448                         return -ENOBUFS;
449                 err = dev_set_allmulti(dev, 1);
450                 if (err) {
451                         unregister_netdevice(dev);
452                         dev_put(dev);
453                         return err;
454                 }
455                 break;
456 #endif
457         case VIFF_TUNNEL:
458                 dev = ipmr_new_tunnel(net, vifc);
459                 if (!dev)
460                         return -ENOBUFS;
461                 err = dev_set_allmulti(dev, 1);
462                 if (err) {
463                         ipmr_del_tunnel(dev, vifc);
464                         dev_put(dev);
465                         return err;
466                 }
467                 break;
468
469         case VIFF_USE_IFINDEX:
470         case 0:
471                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
472                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
473                         if (dev && dev->ip_ptr == NULL) {
474                                 dev_put(dev);
475                                 return -EADDRNOTAVAIL;
476                         }
477                 } else
478                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
479
480                 if (!dev)
481                         return -EADDRNOTAVAIL;
482                 err = dev_set_allmulti(dev, 1);
483                 if (err) {
484                         dev_put(dev);
485                         return err;
486                 }
487                 break;
488         default:
489                 return -EINVAL;
490         }
491
492         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
493                 dev_put(dev);
494                 return -EADDRNOTAVAIL;
495         }
496         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
497         ip_rt_multicast_event(in_dev);
498
499         /*
500          *      Fill in the VIF structures
501          */
502         v->rate_limit = vifc->vifc_rate_limit;
503         v->local = vifc->vifc_lcl_addr.s_addr;
504         v->remote = vifc->vifc_rmt_addr.s_addr;
505         v->flags = vifc->vifc_flags;
506         if (!mrtsock)
507                 v->flags |= VIFF_STATIC;
508         v->threshold = vifc->vifc_threshold;
509         v->bytes_in = 0;
510         v->bytes_out = 0;
511         v->pkt_in = 0;
512         v->pkt_out = 0;
513         v->link = dev->ifindex;
514         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
515                 v->link = dev->iflink;
516
517         /* And finish update writing critical data */
518         write_lock_bh(&mrt_lock);
519         v->dev = dev;
520 #ifdef CONFIG_IP_PIMSM
521         if (v->flags&VIFF_REGISTER)
522                 net->ipv4.mroute_reg_vif_num = vifi;
523 #endif
524         if (vifi+1 > net->ipv4.maxvif)
525                 net->ipv4.maxvif = vifi+1;
526         write_unlock_bh(&mrt_lock);
527         return 0;
528 }
529
530 static struct mfc_cache *ipmr_cache_find(struct net *net,
531                                          __be32 origin,
532                                          __be32 mcastgrp)
533 {
534         int line = MFC_HASH(mcastgrp, origin);
535         struct mfc_cache *c;
536
537         list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
538                 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
539                         return c;
540         }
541         return NULL;
542 }
543
544 /*
545  *      Allocate a multicast cache entry
546  */
547 static struct mfc_cache *ipmr_cache_alloc(void)
548 {
549         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
550         if (c == NULL)
551                 return NULL;
552         c->mfc_un.res.minvif = MAXVIFS;
553         return c;
554 }
555
556 static struct mfc_cache *ipmr_cache_alloc_unres(void)
557 {
558         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
559         if (c == NULL)
560                 return NULL;
561         skb_queue_head_init(&c->mfc_un.unres.unresolved);
562         c->mfc_un.unres.expires = jiffies + 10*HZ;
563         return c;
564 }
565
566 /*
567  *      A cache entry has gone into a resolved state from queued
568  */
569
570 static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
571                                struct mfc_cache *c)
572 {
573         struct sk_buff *skb;
574         struct nlmsgerr *e;
575
576         /*
577          *      Play the pending entries through our router
578          */
579
580         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
581                 if (ip_hdr(skb)->version == 0) {
582                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
583
584                         if (ipmr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
585                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
586                                                   (u8 *)nlh);
587                         } else {
588                                 nlh->nlmsg_type = NLMSG_ERROR;
589                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
590                                 skb_trim(skb, nlh->nlmsg_len);
591                                 e = NLMSG_DATA(nlh);
592                                 e->error = -EMSGSIZE;
593                                 memset(&e->msg, 0, sizeof(e->msg));
594                         }
595
596                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
597                 } else
598                         ip_mr_forward(net, skb, c, 0);
599         }
600 }
601
602 /*
603  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
604  *      expects the following bizarre scheme.
605  *
606  *      Called under mrt_lock.
607  */
608
609 static int ipmr_cache_report(struct net *net,
610                              struct sk_buff *pkt, vifi_t vifi, int assert)
611 {
612         struct sk_buff *skb;
613         const int ihl = ip_hdrlen(pkt);
614         struct igmphdr *igmp;
615         struct igmpmsg *msg;
616         int ret;
617
618 #ifdef CONFIG_IP_PIMSM
619         if (assert == IGMPMSG_WHOLEPKT)
620                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
621         else
622 #endif
623                 skb = alloc_skb(128, GFP_ATOMIC);
624
625         if (!skb)
626                 return -ENOBUFS;
627
628 #ifdef CONFIG_IP_PIMSM
629         if (assert == IGMPMSG_WHOLEPKT) {
630                 /* Ugly, but we have no choice with this interface.
631                    Duplicate old header, fix ihl, length etc.
632                    And all this only to mangle msg->im_msgtype and
633                    to set msg->im_mbz to "mbz" :-)
634                  */
635                 skb_push(skb, sizeof(struct iphdr));
636                 skb_reset_network_header(skb);
637                 skb_reset_transport_header(skb);
638                 msg = (struct igmpmsg *)skb_network_header(skb);
639                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
640                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
641                 msg->im_mbz = 0;
642                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
643                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
644                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
645                                              sizeof(struct iphdr));
646         } else
647 #endif
648         {
649
650         /*
651          *      Copy the IP header
652          */
653
654         skb->network_header = skb->tail;
655         skb_put(skb, ihl);
656         skb_copy_to_linear_data(skb, pkt->data, ihl);
657         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
658         msg = (struct igmpmsg *)skb_network_header(skb);
659         msg->im_vif = vifi;
660         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
661
662         /*
663          *      Add our header
664          */
665
666         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
667         igmp->type      =
668         msg->im_msgtype = assert;
669         igmp->code      =       0;
670         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
671         skb->transport_header = skb->network_header;
672         }
673
674         if (net->ipv4.mroute_sk == NULL) {
675                 kfree_skb(skb);
676                 return -EINVAL;
677         }
678
679         /*
680          *      Deliver to mrouted
681          */
682         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
683         if (ret < 0) {
684                 if (net_ratelimit())
685                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
686                 kfree_skb(skb);
687         }
688
689         return ret;
690 }
691
692 /*
693  *      Queue a packet for resolution. It gets locked cache entry!
694  */
695
696 static int
697 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
698 {
699         bool found = false;
700         int err;
701         struct mfc_cache *c;
702         const struct iphdr *iph = ip_hdr(skb);
703
704         spin_lock_bh(&mfc_unres_lock);
705         list_for_each_entry(c, &net->ipv4.mfc_unres_queue, list) {
706                 if (c->mfc_mcastgrp == iph->daddr &&
707                     c->mfc_origin == iph->saddr) {
708                         found = true;
709                         break;
710                 }
711         }
712
713         if (!found) {
714                 /*
715                  *      Create a new entry if allowable
716                  */
717
718                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
719                     (c = ipmr_cache_alloc_unres()) == NULL) {
720                         spin_unlock_bh(&mfc_unres_lock);
721
722                         kfree_skb(skb);
723                         return -ENOBUFS;
724                 }
725
726                 /*
727                  *      Fill in the new cache entry
728                  */
729                 c->mfc_parent   = -1;
730                 c->mfc_origin   = iph->saddr;
731                 c->mfc_mcastgrp = iph->daddr;
732
733                 /*
734                  *      Reflect first query at mrouted.
735                  */
736                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
737                 if (err < 0) {
738                         /* If the report failed throw the cache entry
739                            out - Brad Parker
740                          */
741                         spin_unlock_bh(&mfc_unres_lock);
742
743                         ipmr_cache_free(c);
744                         kfree_skb(skb);
745                         return err;
746                 }
747
748                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
749                 list_add(&c->list, &net->ipv4.mfc_unres_queue);
750
751                 mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
752         }
753
754         /*
755          *      See if we can append the packet
756          */
757         if (c->mfc_un.unres.unresolved.qlen>3) {
758                 kfree_skb(skb);
759                 err = -ENOBUFS;
760         } else {
761                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
762                 err = 0;
763         }
764
765         spin_unlock_bh(&mfc_unres_lock);
766         return err;
767 }
768
769 /*
770  *      MFC cache manipulation by user space mroute daemon
771  */
772
773 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
774 {
775         int line;
776         struct mfc_cache *c, *next;
777
778         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
779
780         list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[line], list) {
781                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
782                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
783                         write_lock_bh(&mrt_lock);
784                         list_del(&c->list);
785                         write_unlock_bh(&mrt_lock);
786
787                         ipmr_cache_free(c);
788                         return 0;
789                 }
790         }
791         return -ENOENT;
792 }
793
794 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
795 {
796         bool found = false;
797         int line;
798         struct mfc_cache *uc, *c;
799
800         if (mfc->mfcc_parent >= MAXVIFS)
801                 return -ENFILE;
802
803         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
804
805         list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
806                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
807                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
808                         found = true;
809                         break;
810                 }
811         }
812
813         if (found) {
814                 write_lock_bh(&mrt_lock);
815                 c->mfc_parent = mfc->mfcc_parent;
816                 ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
817                 if (!mrtsock)
818                         c->mfc_flags |= MFC_STATIC;
819                 write_unlock_bh(&mrt_lock);
820                 return 0;
821         }
822
823         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
824                 return -EINVAL;
825
826         c = ipmr_cache_alloc();
827         if (c == NULL)
828                 return -ENOMEM;
829
830         c->mfc_origin = mfc->mfcc_origin.s_addr;
831         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
832         c->mfc_parent = mfc->mfcc_parent;
833         ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
834         if (!mrtsock)
835                 c->mfc_flags |= MFC_STATIC;
836
837         write_lock_bh(&mrt_lock);
838         list_add(&c->list, &net->ipv4.mfc_cache_array[line]);
839         write_unlock_bh(&mrt_lock);
840
841         /*
842          *      Check to see if we resolved a queued list. If so we
843          *      need to send on the frames and tidy up.
844          */
845         spin_lock_bh(&mfc_unres_lock);
846         list_for_each_entry(uc, &net->ipv4.mfc_unres_queue, list) {
847                 if (uc->mfc_origin == c->mfc_origin &&
848                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
849                         list_del(&uc->list);
850                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
851                         break;
852                 }
853         }
854         if (list_empty(&net->ipv4.mfc_unres_queue))
855                 del_timer(&net->ipv4.ipmr_expire_timer);
856         spin_unlock_bh(&mfc_unres_lock);
857
858         if (uc) {
859                 ipmr_cache_resolve(net, uc, c);
860                 ipmr_cache_free(uc);
861         }
862         return 0;
863 }
864
865 /*
866  *      Close the multicast socket, and clear the vif tables etc
867  */
868
869 static void mroute_clean_tables(struct net *net)
870 {
871         int i;
872         LIST_HEAD(list);
873         struct mfc_cache *c, *next;
874
875         /*
876          *      Shut down all active vif entries
877          */
878         for (i = 0; i < net->ipv4.maxvif; i++) {
879                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
880                         vif_delete(net, i, 0, &list);
881         }
882         unregister_netdevice_many(&list);
883
884         /*
885          *      Wipe the cache
886          */
887         for (i = 0; i < MFC_LINES; i++) {
888                 list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[i], list) {
889                         if (c->mfc_flags&MFC_STATIC)
890                                 continue;
891                         write_lock_bh(&mrt_lock);
892                         list_del(&c->list);
893                         write_unlock_bh(&mrt_lock);
894
895                         ipmr_cache_free(c);
896                 }
897         }
898
899         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
900                 spin_lock_bh(&mfc_unres_lock);
901                 list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
902                         list_del(&c->list);
903                         ipmr_destroy_unres(net, c);
904                 }
905                 spin_unlock_bh(&mfc_unres_lock);
906         }
907 }
908
909 static void mrtsock_destruct(struct sock *sk)
910 {
911         struct net *net = sock_net(sk);
912
913         rtnl_lock();
914         if (sk == net->ipv4.mroute_sk) {
915                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
916
917                 write_lock_bh(&mrt_lock);
918                 net->ipv4.mroute_sk = NULL;
919                 write_unlock_bh(&mrt_lock);
920
921                 mroute_clean_tables(net);
922         }
923         rtnl_unlock();
924 }
925
926 /*
927  *      Socket options and virtual interface manipulation. The whole
928  *      virtual interface system is a complete heap, but unfortunately
929  *      that's how BSD mrouted happens to think. Maybe one day with a proper
930  *      MOSPF/PIM router set up we can clean this up.
931  */
932
933 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
934 {
935         int ret;
936         struct vifctl vif;
937         struct mfcctl mfc;
938         struct net *net = sock_net(sk);
939
940         if (optname != MRT_INIT) {
941                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
942                         return -EACCES;
943         }
944
945         switch (optname) {
946         case MRT_INIT:
947                 if (sk->sk_type != SOCK_RAW ||
948                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
949                         return -EOPNOTSUPP;
950                 if (optlen != sizeof(int))
951                         return -ENOPROTOOPT;
952
953                 rtnl_lock();
954                 if (net->ipv4.mroute_sk) {
955                         rtnl_unlock();
956                         return -EADDRINUSE;
957                 }
958
959                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
960                 if (ret == 0) {
961                         write_lock_bh(&mrt_lock);
962                         net->ipv4.mroute_sk = sk;
963                         write_unlock_bh(&mrt_lock);
964
965                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
966                 }
967                 rtnl_unlock();
968                 return ret;
969         case MRT_DONE:
970                 if (sk != net->ipv4.mroute_sk)
971                         return -EACCES;
972                 return ip_ra_control(sk, 0, NULL);
973         case MRT_ADD_VIF:
974         case MRT_DEL_VIF:
975                 if (optlen != sizeof(vif))
976                         return -EINVAL;
977                 if (copy_from_user(&vif, optval, sizeof(vif)))
978                         return -EFAULT;
979                 if (vif.vifc_vifi >= MAXVIFS)
980                         return -ENFILE;
981                 rtnl_lock();
982                 if (optname == MRT_ADD_VIF) {
983                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
984                 } else {
985                         ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
986                 }
987                 rtnl_unlock();
988                 return ret;
989
990                 /*
991                  *      Manipulate the forwarding caches. These live
992                  *      in a sort of kernel/user symbiosis.
993                  */
994         case MRT_ADD_MFC:
995         case MRT_DEL_MFC:
996                 if (optlen != sizeof(mfc))
997                         return -EINVAL;
998                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
999                         return -EFAULT;
1000                 rtnl_lock();
1001                 if (optname == MRT_DEL_MFC)
1002                         ret = ipmr_mfc_delete(net, &mfc);
1003                 else
1004                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1005                 rtnl_unlock();
1006                 return ret;
1007                 /*
1008                  *      Control PIM assert.
1009                  */
1010         case MRT_ASSERT:
1011         {
1012                 int v;
1013                 if (get_user(v,(int __user *)optval))
1014                         return -EFAULT;
1015                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1016                 return 0;
1017         }
1018 #ifdef CONFIG_IP_PIMSM
1019         case MRT_PIM:
1020         {
1021                 int v;
1022
1023                 if (get_user(v,(int __user *)optval))
1024                         return -EFAULT;
1025                 v = (v) ? 1 : 0;
1026
1027                 rtnl_lock();
1028                 ret = 0;
1029                 if (v != net->ipv4.mroute_do_pim) {
1030                         net->ipv4.mroute_do_pim = v;
1031                         net->ipv4.mroute_do_assert = v;
1032                 }
1033                 rtnl_unlock();
1034                 return ret;
1035         }
1036 #endif
1037         /*
1038          *      Spurious command, or MRT_VERSION which you cannot
1039          *      set.
1040          */
1041         default:
1042                 return -ENOPROTOOPT;
1043         }
1044 }
1045
1046 /*
1047  *      Getsock opt support for the multicast routing system.
1048  */
1049
1050 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1051 {
1052         int olr;
1053         int val;
1054         struct net *net = sock_net(sk);
1055
1056         if (optname != MRT_VERSION &&
1057 #ifdef CONFIG_IP_PIMSM
1058            optname!=MRT_PIM &&
1059 #endif
1060            optname!=MRT_ASSERT)
1061                 return -ENOPROTOOPT;
1062
1063         if (get_user(olr, optlen))
1064                 return -EFAULT;
1065
1066         olr = min_t(unsigned int, olr, sizeof(int));
1067         if (olr < 0)
1068                 return -EINVAL;
1069
1070         if (put_user(olr, optlen))
1071                 return -EFAULT;
1072         if (optname == MRT_VERSION)
1073                 val = 0x0305;
1074 #ifdef CONFIG_IP_PIMSM
1075         else if (optname == MRT_PIM)
1076                 val = net->ipv4.mroute_do_pim;
1077 #endif
1078         else
1079                 val = net->ipv4.mroute_do_assert;
1080         if (copy_to_user(optval, &val, olr))
1081                 return -EFAULT;
1082         return 0;
1083 }
1084
1085 /*
1086  *      The IP multicast ioctl support routines.
1087  */
1088
1089 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1090 {
1091         struct sioc_sg_req sr;
1092         struct sioc_vif_req vr;
1093         struct vif_device *vif;
1094         struct mfc_cache *c;
1095         struct net *net = sock_net(sk);
1096
1097         switch (cmd) {
1098         case SIOCGETVIFCNT:
1099                 if (copy_from_user(&vr, arg, sizeof(vr)))
1100                         return -EFAULT;
1101                 if (vr.vifi >= net->ipv4.maxvif)
1102                         return -EINVAL;
1103                 read_lock(&mrt_lock);
1104                 vif = &net->ipv4.vif_table[vr.vifi];
1105                 if (VIF_EXISTS(net, vr.vifi)) {
1106                         vr.icount = vif->pkt_in;
1107                         vr.ocount = vif->pkt_out;
1108                         vr.ibytes = vif->bytes_in;
1109                         vr.obytes = vif->bytes_out;
1110                         read_unlock(&mrt_lock);
1111
1112                         if (copy_to_user(arg, &vr, sizeof(vr)))
1113                                 return -EFAULT;
1114                         return 0;
1115                 }
1116                 read_unlock(&mrt_lock);
1117                 return -EADDRNOTAVAIL;
1118         case SIOCGETSGCNT:
1119                 if (copy_from_user(&sr, arg, sizeof(sr)))
1120                         return -EFAULT;
1121
1122                 read_lock(&mrt_lock);
1123                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1124                 if (c) {
1125                         sr.pktcnt = c->mfc_un.res.pkt;
1126                         sr.bytecnt = c->mfc_un.res.bytes;
1127                         sr.wrong_if = c->mfc_un.res.wrong_if;
1128                         read_unlock(&mrt_lock);
1129
1130                         if (copy_to_user(arg, &sr, sizeof(sr)))
1131                                 return -EFAULT;
1132                         return 0;
1133                 }
1134                 read_unlock(&mrt_lock);
1135                 return -EADDRNOTAVAIL;
1136         default:
1137                 return -ENOIOCTLCMD;
1138         }
1139 }
1140
1141
1142 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1143 {
1144         struct net_device *dev = ptr;
1145         struct net *net = dev_net(dev);
1146         struct vif_device *v;
1147         int ct;
1148         LIST_HEAD(list);
1149
1150         if (event != NETDEV_UNREGISTER)
1151                 return NOTIFY_DONE;
1152         v = &net->ipv4.vif_table[0];
1153         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1154                 if (v->dev == dev)
1155                         vif_delete(net, ct, 1, &list);
1156         }
1157         unregister_netdevice_many(&list);
1158         return NOTIFY_DONE;
1159 }
1160
1161
1162 static struct notifier_block ip_mr_notifier = {
1163         .notifier_call = ipmr_device_event,
1164 };
1165
1166 /*
1167  *      Encapsulate a packet by attaching a valid IPIP header to it.
1168  *      This avoids tunnel drivers and other mess and gives us the speed so
1169  *      important for multicast video.
1170  */
1171
1172 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1173 {
1174         struct iphdr *iph;
1175         struct iphdr *old_iph = ip_hdr(skb);
1176
1177         skb_push(skb, sizeof(struct iphdr));
1178         skb->transport_header = skb->network_header;
1179         skb_reset_network_header(skb);
1180         iph = ip_hdr(skb);
1181
1182         iph->version    =       4;
1183         iph->tos        =       old_iph->tos;
1184         iph->ttl        =       old_iph->ttl;
1185         iph->frag_off   =       0;
1186         iph->daddr      =       daddr;
1187         iph->saddr      =       saddr;
1188         iph->protocol   =       IPPROTO_IPIP;
1189         iph->ihl        =       5;
1190         iph->tot_len    =       htons(skb->len);
1191         ip_select_ident(iph, skb_dst(skb), NULL);
1192         ip_send_check(iph);
1193
1194         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1195         nf_reset(skb);
1196 }
1197
1198 static inline int ipmr_forward_finish(struct sk_buff *skb)
1199 {
1200         struct ip_options * opt = &(IPCB(skb)->opt);
1201
1202         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1203
1204         if (unlikely(opt->optlen))
1205                 ip_forward_options(skb);
1206
1207         return dst_output(skb);
1208 }
1209
1210 /*
1211  *      Processing handlers for ipmr_forward
1212  */
1213
1214 static void ipmr_queue_xmit(struct net *net, struct sk_buff *skb,
1215                             struct mfc_cache *c, int vifi)
1216 {
1217         const struct iphdr *iph = ip_hdr(skb);
1218         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1219         struct net_device *dev;
1220         struct rtable *rt;
1221         int    encap = 0;
1222
1223         if (vif->dev == NULL)
1224                 goto out_free;
1225
1226 #ifdef CONFIG_IP_PIMSM
1227         if (vif->flags & VIFF_REGISTER) {
1228                 vif->pkt_out++;
1229                 vif->bytes_out += skb->len;
1230                 vif->dev->stats.tx_bytes += skb->len;
1231                 vif->dev->stats.tx_packets++;
1232                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1233                 goto out_free;
1234         }
1235 #endif
1236
1237         if (vif->flags&VIFF_TUNNEL) {
1238                 struct flowi fl = { .oif = vif->link,
1239                                     .nl_u = { .ip4_u =
1240                                               { .daddr = vif->remote,
1241                                                 .saddr = vif->local,
1242                                                 .tos = RT_TOS(iph->tos) } },
1243                                     .proto = IPPROTO_IPIP };
1244                 if (ip_route_output_key(net, &rt, &fl))
1245                         goto out_free;
1246                 encap = sizeof(struct iphdr);
1247         } else {
1248                 struct flowi fl = { .oif = vif->link,
1249                                     .nl_u = { .ip4_u =
1250                                               { .daddr = iph->daddr,
1251                                                 .tos = RT_TOS(iph->tos) } },
1252                                     .proto = IPPROTO_IPIP };
1253                 if (ip_route_output_key(net, &rt, &fl))
1254                         goto out_free;
1255         }
1256
1257         dev = rt->u.dst.dev;
1258
1259         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1260                 /* Do not fragment multicasts. Alas, IPv4 does not
1261                    allow to send ICMP, so that packets will disappear
1262                    to blackhole.
1263                  */
1264
1265                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1266                 ip_rt_put(rt);
1267                 goto out_free;
1268         }
1269
1270         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1271
1272         if (skb_cow(skb, encap)) {
1273                 ip_rt_put(rt);
1274                 goto out_free;
1275         }
1276
1277         vif->pkt_out++;
1278         vif->bytes_out += skb->len;
1279
1280         skb_dst_drop(skb);
1281         skb_dst_set(skb, &rt->u.dst);
1282         ip_decrease_ttl(ip_hdr(skb));
1283
1284         /* FIXME: forward and output firewalls used to be called here.
1285          * What do we do with netfilter? -- RR */
1286         if (vif->flags & VIFF_TUNNEL) {
1287                 ip_encap(skb, vif->local, vif->remote);
1288                 /* FIXME: extra output firewall step used to be here. --RR */
1289                 vif->dev->stats.tx_packets++;
1290                 vif->dev->stats.tx_bytes += skb->len;
1291         }
1292
1293         IPCB(skb)->flags |= IPSKB_FORWARDED;
1294
1295         /*
1296          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1297          * not only before forwarding, but after forwarding on all output
1298          * interfaces. It is clear, if mrouter runs a multicasting
1299          * program, it should receive packets not depending to what interface
1300          * program is joined.
1301          * If we will not make it, the program will have to join on all
1302          * interfaces. On the other hand, multihoming host (or router, but
1303          * not mrouter) cannot join to more than one interface - it will
1304          * result in receiving multiple packets.
1305          */
1306         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1307                 ipmr_forward_finish);
1308         return;
1309
1310 out_free:
1311         kfree_skb(skb);
1312         return;
1313 }
1314
1315 static int ipmr_find_vif(struct net_device *dev)
1316 {
1317         struct net *net = dev_net(dev);
1318         int ct;
1319         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1320                 if (net->ipv4.vif_table[ct].dev == dev)
1321                         break;
1322         }
1323         return ct;
1324 }
1325
1326 /* "local" means that we should preserve one skb (for local delivery) */
1327
1328 static int ip_mr_forward(struct net *net, struct sk_buff *skb,
1329                          struct mfc_cache *cache, int local)
1330 {
1331         int psend = -1;
1332         int vif, ct;
1333
1334         vif = cache->mfc_parent;
1335         cache->mfc_un.res.pkt++;
1336         cache->mfc_un.res.bytes += skb->len;
1337
1338         /*
1339          * Wrong interface: drop packet and (maybe) send PIM assert.
1340          */
1341         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1342                 int true_vifi;
1343
1344                 if (skb_rtable(skb)->fl.iif == 0) {
1345                         /* It is our own packet, looped back.
1346                            Very complicated situation...
1347
1348                            The best workaround until routing daemons will be
1349                            fixed is not to redistribute packet, if it was
1350                            send through wrong interface. It means, that
1351                            multicast applications WILL NOT work for
1352                            (S,G), which have default multicast route pointing
1353                            to wrong oif. In any case, it is not a good
1354                            idea to use multicasting applications on router.
1355                          */
1356                         goto dont_forward;
1357                 }
1358
1359                 cache->mfc_un.res.wrong_if++;
1360                 true_vifi = ipmr_find_vif(skb->dev);
1361
1362                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1363                     /* pimsm uses asserts, when switching from RPT to SPT,
1364                        so that we cannot check that packet arrived on an oif.
1365                        It is bad, but otherwise we would need to move pretty
1366                        large chunk of pimd to kernel. Ough... --ANK
1367                      */
1368                     (net->ipv4.mroute_do_pim ||
1369                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1370                     time_after(jiffies,
1371                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1372                         cache->mfc_un.res.last_assert = jiffies;
1373                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1374                 }
1375                 goto dont_forward;
1376         }
1377
1378         net->ipv4.vif_table[vif].pkt_in++;
1379         net->ipv4.vif_table[vif].bytes_in += skb->len;
1380
1381         /*
1382          *      Forward the frame
1383          */
1384         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1385                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1386                         if (psend != -1) {
1387                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1388                                 if (skb2)
1389                                         ipmr_queue_xmit(net, skb2, cache, psend);
1390                         }
1391                         psend = ct;
1392                 }
1393         }
1394         if (psend != -1) {
1395                 if (local) {
1396                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1397                         if (skb2)
1398                                 ipmr_queue_xmit(net, skb2, cache, psend);
1399                 } else {
1400                         ipmr_queue_xmit(net, skb, cache, psend);
1401                         return 0;
1402                 }
1403         }
1404
1405 dont_forward:
1406         if (!local)
1407                 kfree_skb(skb);
1408         return 0;
1409 }
1410
1411
1412 /*
1413  *      Multicast packets for forwarding arrive here
1414  */
1415
1416 int ip_mr_input(struct sk_buff *skb)
1417 {
1418         struct mfc_cache *cache;
1419         struct net *net = dev_net(skb->dev);
1420         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1421
1422         /* Packet is looped back after forward, it should not be
1423            forwarded second time, but still can be delivered locally.
1424          */
1425         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1426                 goto dont_forward;
1427
1428         if (!local) {
1429                     if (IPCB(skb)->opt.router_alert) {
1430                             if (ip_call_ra_chain(skb))
1431                                     return 0;
1432                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1433                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1434                                Cisco IOS <= 11.2(8)) do not put router alert
1435                                option to IGMP packets destined to routable
1436                                groups. It is very bad, because it means
1437                                that we can forward NO IGMP messages.
1438                              */
1439                             read_lock(&mrt_lock);
1440                             if (net->ipv4.mroute_sk) {
1441                                     nf_reset(skb);
1442                                     raw_rcv(net->ipv4.mroute_sk, skb);
1443                                     read_unlock(&mrt_lock);
1444                                     return 0;
1445                             }
1446                             read_unlock(&mrt_lock);
1447                     }
1448         }
1449
1450         read_lock(&mrt_lock);
1451         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1452
1453         /*
1454          *      No usable cache entry
1455          */
1456         if (cache == NULL) {
1457                 int vif;
1458
1459                 if (local) {
1460                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1461                         ip_local_deliver(skb);
1462                         if (skb2 == NULL) {
1463                                 read_unlock(&mrt_lock);
1464                                 return -ENOBUFS;
1465                         }
1466                         skb = skb2;
1467                 }
1468
1469                 vif = ipmr_find_vif(skb->dev);
1470                 if (vif >= 0) {
1471                         int err = ipmr_cache_unresolved(net, vif, skb);
1472                         read_unlock(&mrt_lock);
1473
1474                         return err;
1475                 }
1476                 read_unlock(&mrt_lock);
1477                 kfree_skb(skb);
1478                 return -ENODEV;
1479         }
1480
1481         ip_mr_forward(net, skb, cache, local);
1482
1483         read_unlock(&mrt_lock);
1484
1485         if (local)
1486                 return ip_local_deliver(skb);
1487
1488         return 0;
1489
1490 dont_forward:
1491         if (local)
1492                 return ip_local_deliver(skb);
1493         kfree_skb(skb);
1494         return 0;
1495 }
1496
1497 #ifdef CONFIG_IP_PIMSM
1498 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1499 {
1500         struct net_device *reg_dev = NULL;
1501         struct iphdr *encap;
1502         struct net *net = dev_net(skb->dev);
1503
1504         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1505         /*
1506            Check that:
1507            a. packet is really destinted to a multicast group
1508            b. packet is not a NULL-REGISTER
1509            c. packet is not truncated
1510          */
1511         if (!ipv4_is_multicast(encap->daddr) ||
1512             encap->tot_len == 0 ||
1513             ntohs(encap->tot_len) + pimlen > skb->len)
1514                 return 1;
1515
1516         read_lock(&mrt_lock);
1517         if (net->ipv4.mroute_reg_vif_num >= 0)
1518                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1519         if (reg_dev)
1520                 dev_hold(reg_dev);
1521         read_unlock(&mrt_lock);
1522
1523         if (reg_dev == NULL)
1524                 return 1;
1525
1526         skb->mac_header = skb->network_header;
1527         skb_pull(skb, (u8*)encap - skb->data);
1528         skb_reset_network_header(skb);
1529         skb->dev = reg_dev;
1530         skb->protocol = htons(ETH_P_IP);
1531         skb->ip_summed = 0;
1532         skb->pkt_type = PACKET_HOST;
1533         skb_dst_drop(skb);
1534         reg_dev->stats.rx_bytes += skb->len;
1535         reg_dev->stats.rx_packets++;
1536         nf_reset(skb);
1537         netif_rx(skb);
1538         dev_put(reg_dev);
1539
1540         return 0;
1541 }
1542 #endif
1543
1544 #ifdef CONFIG_IP_PIMSM_V1
1545 /*
1546  * Handle IGMP messages of PIMv1
1547  */
1548
1549 int pim_rcv_v1(struct sk_buff * skb)
1550 {
1551         struct igmphdr *pim;
1552         struct net *net = dev_net(skb->dev);
1553
1554         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1555                 goto drop;
1556
1557         pim = igmp_hdr(skb);
1558
1559         if (!net->ipv4.mroute_do_pim ||
1560             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1561                 goto drop;
1562
1563         if (__pim_rcv(skb, sizeof(*pim))) {
1564 drop:
1565                 kfree_skb(skb);
1566         }
1567         return 0;
1568 }
1569 #endif
1570
1571 #ifdef CONFIG_IP_PIMSM_V2
1572 static int pim_rcv(struct sk_buff * skb)
1573 {
1574         struct pimreghdr *pim;
1575
1576         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1577                 goto drop;
1578
1579         pim = (struct pimreghdr *)skb_transport_header(skb);
1580         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1581             (pim->flags&PIM_NULL_REGISTER) ||
1582             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1583              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1584                 goto drop;
1585
1586         if (__pim_rcv(skb, sizeof(*pim))) {
1587 drop:
1588                 kfree_skb(skb);
1589         }
1590         return 0;
1591 }
1592 #endif
1593
1594 static int
1595 ipmr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc_cache *c,
1596                  struct rtmsg *rtm)
1597 {
1598         int ct;
1599         struct rtnexthop *nhp;
1600         u8 *b = skb_tail_pointer(skb);
1601         struct rtattr *mp_head;
1602
1603         /* If cache is unresolved, don't try to parse IIF and OIF */
1604         if (c->mfc_parent > MAXVIFS)
1605                 return -ENOENT;
1606
1607         if (VIF_EXISTS(net, c->mfc_parent))
1608                 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
1609
1610         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1611
1612         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1613                 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1614                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1615                                 goto rtattr_failure;
1616                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1617                         nhp->rtnh_flags = 0;
1618                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1619                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1620                         nhp->rtnh_len = sizeof(*nhp);
1621                 }
1622         }
1623         mp_head->rta_type = RTA_MULTIPATH;
1624         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1625         rtm->rtm_type = RTN_MULTICAST;
1626         return 1;
1627
1628 rtattr_failure:
1629         nlmsg_trim(skb, b);
1630         return -EMSGSIZE;
1631 }
1632
1633 int ipmr_get_route(struct net *net,
1634                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1635 {
1636         int err;
1637         struct mfc_cache *cache;
1638         struct rtable *rt = skb_rtable(skb);
1639
1640         read_lock(&mrt_lock);
1641         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1642
1643         if (cache == NULL) {
1644                 struct sk_buff *skb2;
1645                 struct iphdr *iph;
1646                 struct net_device *dev;
1647                 int vif;
1648
1649                 if (nowait) {
1650                         read_unlock(&mrt_lock);
1651                         return -EAGAIN;
1652                 }
1653
1654                 dev = skb->dev;
1655                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1656                         read_unlock(&mrt_lock);
1657                         return -ENODEV;
1658                 }
1659                 skb2 = skb_clone(skb, GFP_ATOMIC);
1660                 if (!skb2) {
1661                         read_unlock(&mrt_lock);
1662                         return -ENOMEM;
1663                 }
1664
1665                 skb_push(skb2, sizeof(struct iphdr));
1666                 skb_reset_network_header(skb2);
1667                 iph = ip_hdr(skb2);
1668                 iph->ihl = sizeof(struct iphdr) >> 2;
1669                 iph->saddr = rt->rt_src;
1670                 iph->daddr = rt->rt_dst;
1671                 iph->version = 0;
1672                 err = ipmr_cache_unresolved(net, vif, skb2);
1673                 read_unlock(&mrt_lock);
1674                 return err;
1675         }
1676
1677         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1678                 cache->mfc_flags |= MFC_NOTIFY;
1679         err = ipmr_fill_mroute(net, skb, cache, rtm);
1680         read_unlock(&mrt_lock);
1681         return err;
1682 }
1683
1684 #ifdef CONFIG_PROC_FS
1685 /*
1686  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1687  */
1688 struct ipmr_vif_iter {
1689         struct seq_net_private p;
1690         int ct;
1691 };
1692
1693 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1694                                            struct ipmr_vif_iter *iter,
1695                                            loff_t pos)
1696 {
1697         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1698                 if (!VIF_EXISTS(net, iter->ct))
1699                         continue;
1700                 if (pos-- == 0)
1701                         return &net->ipv4.vif_table[iter->ct];
1702         }
1703         return NULL;
1704 }
1705
1706 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1707         __acquires(mrt_lock)
1708 {
1709         struct net *net = seq_file_net(seq);
1710
1711         read_lock(&mrt_lock);
1712         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1713                 : SEQ_START_TOKEN;
1714 }
1715
1716 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1717 {
1718         struct ipmr_vif_iter *iter = seq->private;
1719         struct net *net = seq_file_net(seq);
1720
1721         ++*pos;
1722         if (v == SEQ_START_TOKEN)
1723                 return ipmr_vif_seq_idx(net, iter, 0);
1724
1725         while (++iter->ct < net->ipv4.maxvif) {
1726                 if (!VIF_EXISTS(net, iter->ct))
1727                         continue;
1728                 return &net->ipv4.vif_table[iter->ct];
1729         }
1730         return NULL;
1731 }
1732
1733 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1734         __releases(mrt_lock)
1735 {
1736         read_unlock(&mrt_lock);
1737 }
1738
1739 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1740 {
1741         struct net *net = seq_file_net(seq);
1742
1743         if (v == SEQ_START_TOKEN) {
1744                 seq_puts(seq,
1745                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1746         } else {
1747                 const struct vif_device *vif = v;
1748                 const char *name =  vif->dev ? vif->dev->name : "none";
1749
1750                 seq_printf(seq,
1751                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1752                            vif - net->ipv4.vif_table,
1753                            name, vif->bytes_in, vif->pkt_in,
1754                            vif->bytes_out, vif->pkt_out,
1755                            vif->flags, vif->local, vif->remote);
1756         }
1757         return 0;
1758 }
1759
1760 static const struct seq_operations ipmr_vif_seq_ops = {
1761         .start = ipmr_vif_seq_start,
1762         .next  = ipmr_vif_seq_next,
1763         .stop  = ipmr_vif_seq_stop,
1764         .show  = ipmr_vif_seq_show,
1765 };
1766
1767 static int ipmr_vif_open(struct inode *inode, struct file *file)
1768 {
1769         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1770                             sizeof(struct ipmr_vif_iter));
1771 }
1772
1773 static const struct file_operations ipmr_vif_fops = {
1774         .owner   = THIS_MODULE,
1775         .open    = ipmr_vif_open,
1776         .read    = seq_read,
1777         .llseek  = seq_lseek,
1778         .release = seq_release_net,
1779 };
1780
1781 struct ipmr_mfc_iter {
1782         struct seq_net_private p;
1783         struct list_head *cache;
1784         int ct;
1785 };
1786
1787
1788 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1789                                           struct ipmr_mfc_iter *it, loff_t pos)
1790 {
1791         struct mfc_cache *mfc;
1792
1793         read_lock(&mrt_lock);
1794         for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
1795                 it->cache = &net->ipv4.mfc_cache_array[it->ct];
1796                 list_for_each_entry(mfc, it->cache, list)
1797                         if (pos-- == 0)
1798                                 return mfc;
1799         }
1800         read_unlock(&mrt_lock);
1801
1802         spin_lock_bh(&mfc_unres_lock);
1803         it->cache = &net->ipv4.mfc_unres_queue;
1804         list_for_each_entry(mfc, it->cache, list)
1805                 if (pos-- == 0)
1806                         return mfc;
1807         spin_unlock_bh(&mfc_unres_lock);
1808
1809         it->cache = NULL;
1810         return NULL;
1811 }
1812
1813
1814 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1815 {
1816         struct ipmr_mfc_iter *it = seq->private;
1817         struct net *net = seq_file_net(seq);
1818
1819         it->cache = NULL;
1820         it->ct = 0;
1821         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1822                 : SEQ_START_TOKEN;
1823 }
1824
1825 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1826 {
1827         struct mfc_cache *mfc = v;
1828         struct ipmr_mfc_iter *it = seq->private;
1829         struct net *net = seq_file_net(seq);
1830
1831         ++*pos;
1832
1833         if (v == SEQ_START_TOKEN)
1834                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1835
1836         if (mfc->list.next != it->cache)
1837                 return list_entry(mfc->list.next, struct mfc_cache, list);
1838
1839         if (it->cache == &net->ipv4.mfc_unres_queue)
1840                 goto end_of_list;
1841
1842         BUG_ON(it->cache != &net->ipv4.mfc_cache_array[it->ct]);
1843
1844         while (++it->ct < MFC_LINES) {
1845                 it->cache = &net->ipv4.mfc_cache_array[it->ct];
1846                 if (list_empty(it->cache))
1847                         continue;
1848                 return list_first_entry(it->cache, struct mfc_cache, list);
1849         }
1850
1851         /* exhausted cache_array, show unresolved */
1852         read_unlock(&mrt_lock);
1853         it->cache = &net->ipv4.mfc_unres_queue;
1854         it->ct = 0;
1855
1856         spin_lock_bh(&mfc_unres_lock);
1857         if (!list_empty(it->cache))
1858                 return list_first_entry(it->cache, struct mfc_cache, list);
1859
1860  end_of_list:
1861         spin_unlock_bh(&mfc_unres_lock);
1862         it->cache = NULL;
1863
1864         return NULL;
1865 }
1866
1867 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1868 {
1869         struct ipmr_mfc_iter *it = seq->private;
1870         struct net *net = seq_file_net(seq);
1871
1872         if (it->cache == &net->ipv4.mfc_unres_queue)
1873                 spin_unlock_bh(&mfc_unres_lock);
1874         else if (it->cache == &net->ipv4.mfc_cache_array[it->ct])
1875                 read_unlock(&mrt_lock);
1876 }
1877
1878 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1879 {
1880         int n;
1881         struct net *net = seq_file_net(seq);
1882
1883         if (v == SEQ_START_TOKEN) {
1884                 seq_puts(seq,
1885                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1886         } else {
1887                 const struct mfc_cache *mfc = v;
1888                 const struct ipmr_mfc_iter *it = seq->private;
1889
1890                 seq_printf(seq, "%08lX %08lX %-3hd",
1891                            (unsigned long) mfc->mfc_mcastgrp,
1892                            (unsigned long) mfc->mfc_origin,
1893                            mfc->mfc_parent);
1894
1895                 if (it->cache != &net->ipv4.mfc_unres_queue) {
1896                         seq_printf(seq, " %8lu %8lu %8lu",
1897                                    mfc->mfc_un.res.pkt,
1898                                    mfc->mfc_un.res.bytes,
1899                                    mfc->mfc_un.res.wrong_if);
1900                         for (n = mfc->mfc_un.res.minvif;
1901                              n < mfc->mfc_un.res.maxvif; n++ ) {
1902                                 if (VIF_EXISTS(net, n) &&
1903                                     mfc->mfc_un.res.ttls[n] < 255)
1904                                         seq_printf(seq,
1905                                            " %2d:%-3d",
1906                                            n, mfc->mfc_un.res.ttls[n]);
1907                         }
1908                 } else {
1909                         /* unresolved mfc_caches don't contain
1910                          * pkt, bytes and wrong_if values
1911                          */
1912                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1913                 }
1914                 seq_putc(seq, '\n');
1915         }
1916         return 0;
1917 }
1918
1919 static const struct seq_operations ipmr_mfc_seq_ops = {
1920         .start = ipmr_mfc_seq_start,
1921         .next  = ipmr_mfc_seq_next,
1922         .stop  = ipmr_mfc_seq_stop,
1923         .show  = ipmr_mfc_seq_show,
1924 };
1925
1926 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1927 {
1928         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1929                             sizeof(struct ipmr_mfc_iter));
1930 }
1931
1932 static const struct file_operations ipmr_mfc_fops = {
1933         .owner   = THIS_MODULE,
1934         .open    = ipmr_mfc_open,
1935         .read    = seq_read,
1936         .llseek  = seq_lseek,
1937         .release = seq_release_net,
1938 };
1939 #endif
1940
1941 #ifdef CONFIG_IP_PIMSM_V2
1942 static const struct net_protocol pim_protocol = {
1943         .handler        =       pim_rcv,
1944         .netns_ok       =       1,
1945 };
1946 #endif
1947
1948
1949 /*
1950  *      Setup for IP multicast routing
1951  */
1952 static int __net_init ipmr_net_init(struct net *net)
1953 {
1954         unsigned int i;
1955         int err = 0;
1956
1957         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1958                                       GFP_KERNEL);
1959         if (!net->ipv4.vif_table) {
1960                 err = -ENOMEM;
1961                 goto fail;
1962         }
1963
1964         /* Forwarding cache */
1965         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1966                                             sizeof(struct list_head),
1967                                             GFP_KERNEL);
1968         if (!net->ipv4.mfc_cache_array) {
1969                 err = -ENOMEM;
1970                 goto fail_mfc_cache;
1971         }
1972
1973         for (i = 0; i < MFC_LINES; i++)
1974                 INIT_LIST_HEAD(&net->ipv4.mfc_cache_array[i]);
1975
1976         INIT_LIST_HEAD(&net->ipv4.mfc_unres_queue);
1977
1978         setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
1979                     (unsigned long)net);
1980
1981 #ifdef CONFIG_IP_PIMSM
1982         net->ipv4.mroute_reg_vif_num = -1;
1983 #endif
1984
1985 #ifdef CONFIG_PROC_FS
1986         err = -ENOMEM;
1987         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1988                 goto proc_vif_fail;
1989         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1990                 goto proc_cache_fail;
1991 #endif
1992         return 0;
1993
1994 #ifdef CONFIG_PROC_FS
1995 proc_cache_fail:
1996         proc_net_remove(net, "ip_mr_vif");
1997 proc_vif_fail:
1998         kfree(net->ipv4.mfc_cache_array);
1999 #endif
2000 fail_mfc_cache:
2001         kfree(net->ipv4.vif_table);
2002 fail:
2003         return err;
2004 }
2005
2006 static void __net_exit ipmr_net_exit(struct net *net)
2007 {
2008 #ifdef CONFIG_PROC_FS
2009         proc_net_remove(net, "ip_mr_cache");
2010         proc_net_remove(net, "ip_mr_vif");
2011 #endif
2012         kfree(net->ipv4.mfc_cache_array);
2013         kfree(net->ipv4.vif_table);
2014 }
2015
2016 static struct pernet_operations ipmr_net_ops = {
2017         .init = ipmr_net_init,
2018         .exit = ipmr_net_exit,
2019 };
2020
2021 int __init ip_mr_init(void)
2022 {
2023         int err;
2024
2025         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2026                                        sizeof(struct mfc_cache),
2027                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2028                                        NULL);
2029         if (!mrt_cachep)
2030                 return -ENOMEM;
2031
2032         err = register_pernet_subsys(&ipmr_net_ops);
2033         if (err)
2034                 goto reg_pernet_fail;
2035
2036         err = register_netdevice_notifier(&ip_mr_notifier);
2037         if (err)
2038                 goto reg_notif_fail;
2039 #ifdef CONFIG_IP_PIMSM_V2
2040         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2041                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2042                 err = -EAGAIN;
2043                 goto add_proto_fail;
2044         }
2045 #endif
2046         return 0;
2047
2048 #ifdef CONFIG_IP_PIMSM_V2
2049 add_proto_fail:
2050         unregister_netdevice_notifier(&ip_mr_notifier);
2051 #endif
2052 reg_notif_fail:
2053         unregister_pernet_subsys(&ipmr_net_ops);
2054 reg_pernet_fail:
2055         kmem_cache_destroy(mrt_cachep);
2056         return err;
2057 }