ipv4: ipmr: remove net pointer from struct mfc_cache
[platform/adaptation/renesas_rcar/renesas_kernel.git] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requrement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
56 #include <net/sock.h>
57 #include <net/icmp.h>
58 #include <net/udp.h>
59 #include <net/raw.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66
67 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68 #define CONFIG_IP_PIMSM 1
69 #endif
70
71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
72    Note that the changes are semaphored via rtnl_lock.
73  */
74
75 static DEFINE_RWLOCK(mrt_lock);
76
77 /*
78  *      Multicast router control variables
79  */
80
81 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
82
83 /* Special spinlock for queue of unresolved entries */
84 static DEFINE_SPINLOCK(mfc_unres_lock);
85
86 /* We return to original Alan's scheme. Hash table of resolved
87    entries is changed only in process context and protected
88    with weak lock mrt_lock. Queue of unresolved entries is protected
89    with strong spinlock mfc_unres_lock.
90
91    In this case data path is free of exclusive locks at all.
92  */
93
94 static struct kmem_cache *mrt_cachep __read_mostly;
95
96 static int ip_mr_forward(struct net *net, struct sk_buff *skb,
97                          struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99                              struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct net *net, struct sk_buff *skb,
101                             struct mfc_cache *c, struct rtmsg *rtm);
102
103 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
104
105 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
106 {
107         struct net *net = dev_net(dev);
108
109         dev_close(dev);
110
111         dev = __dev_get_by_name(net, "tunl0");
112         if (dev) {
113                 const struct net_device_ops *ops = dev->netdev_ops;
114                 struct ifreq ifr;
115                 struct ip_tunnel_parm p;
116
117                 memset(&p, 0, sizeof(p));
118                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
119                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
120                 p.iph.version = 4;
121                 p.iph.ihl = 5;
122                 p.iph.protocol = IPPROTO_IPIP;
123                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
124                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
125
126                 if (ops->ndo_do_ioctl) {
127                         mm_segment_t oldfs = get_fs();
128
129                         set_fs(KERNEL_DS);
130                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
131                         set_fs(oldfs);
132                 }
133         }
134 }
135
136 static
137 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
138 {
139         struct net_device  *dev;
140
141         dev = __dev_get_by_name(net, "tunl0");
142
143         if (dev) {
144                 const struct net_device_ops *ops = dev->netdev_ops;
145                 int err;
146                 struct ifreq ifr;
147                 struct ip_tunnel_parm p;
148                 struct in_device  *in_dev;
149
150                 memset(&p, 0, sizeof(p));
151                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
152                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
153                 p.iph.version = 4;
154                 p.iph.ihl = 5;
155                 p.iph.protocol = IPPROTO_IPIP;
156                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
157                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
158
159                 if (ops->ndo_do_ioctl) {
160                         mm_segment_t oldfs = get_fs();
161
162                         set_fs(KERNEL_DS);
163                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
164                         set_fs(oldfs);
165                 } else
166                         err = -EOPNOTSUPP;
167
168                 dev = NULL;
169
170                 if (err == 0 &&
171                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
172                         dev->flags |= IFF_MULTICAST;
173
174                         in_dev = __in_dev_get_rtnl(dev);
175                         if (in_dev == NULL)
176                                 goto failure;
177
178                         ipv4_devconf_setall(in_dev);
179                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
180
181                         if (dev_open(dev))
182                                 goto failure;
183                         dev_hold(dev);
184                 }
185         }
186         return dev;
187
188 failure:
189         /* allow the register to be completed before unregistering. */
190         rtnl_unlock();
191         rtnl_lock();
192
193         unregister_netdevice(dev);
194         return NULL;
195 }
196
197 #ifdef CONFIG_IP_PIMSM
198
199 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
200 {
201         struct net *net = dev_net(dev);
202
203         read_lock(&mrt_lock);
204         dev->stats.tx_bytes += skb->len;
205         dev->stats.tx_packets++;
206         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
207                           IGMPMSG_WHOLEPKT);
208         read_unlock(&mrt_lock);
209         kfree_skb(skb);
210         return NETDEV_TX_OK;
211 }
212
213 static const struct net_device_ops reg_vif_netdev_ops = {
214         .ndo_start_xmit = reg_vif_xmit,
215 };
216
217 static void reg_vif_setup(struct net_device *dev)
218 {
219         dev->type               = ARPHRD_PIMREG;
220         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
221         dev->flags              = IFF_NOARP;
222         dev->netdev_ops         = &reg_vif_netdev_ops,
223         dev->destructor         = free_netdev;
224         dev->features           |= NETIF_F_NETNS_LOCAL;
225 }
226
227 static struct net_device *ipmr_reg_vif(struct net *net)
228 {
229         struct net_device *dev;
230         struct in_device *in_dev;
231
232         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
233
234         if (dev == NULL)
235                 return NULL;
236
237         dev_net_set(dev, net);
238
239         if (register_netdevice(dev)) {
240                 free_netdev(dev);
241                 return NULL;
242         }
243         dev->iflink = 0;
244
245         rcu_read_lock();
246         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
247                 rcu_read_unlock();
248                 goto failure;
249         }
250
251         ipv4_devconf_setall(in_dev);
252         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
253         rcu_read_unlock();
254
255         if (dev_open(dev))
256                 goto failure;
257
258         dev_hold(dev);
259
260         return dev;
261
262 failure:
263         /* allow the register to be completed before unregistering. */
264         rtnl_unlock();
265         rtnl_lock();
266
267         unregister_netdevice(dev);
268         return NULL;
269 }
270 #endif
271
272 /*
273  *      Delete a VIF entry
274  *      @notify: Set to 1, if the caller is a notifier_call
275  */
276
277 static int vif_delete(struct net *net, int vifi, int notify,
278                       struct list_head *head)
279 {
280         struct vif_device *v;
281         struct net_device *dev;
282         struct in_device *in_dev;
283
284         if (vifi < 0 || vifi >= net->ipv4.maxvif)
285                 return -EADDRNOTAVAIL;
286
287         v = &net->ipv4.vif_table[vifi];
288
289         write_lock_bh(&mrt_lock);
290         dev = v->dev;
291         v->dev = NULL;
292
293         if (!dev) {
294                 write_unlock_bh(&mrt_lock);
295                 return -EADDRNOTAVAIL;
296         }
297
298 #ifdef CONFIG_IP_PIMSM
299         if (vifi == net->ipv4.mroute_reg_vif_num)
300                 net->ipv4.mroute_reg_vif_num = -1;
301 #endif
302
303         if (vifi+1 == net->ipv4.maxvif) {
304                 int tmp;
305                 for (tmp=vifi-1; tmp>=0; tmp--) {
306                         if (VIF_EXISTS(net, tmp))
307                                 break;
308                 }
309                 net->ipv4.maxvif = tmp+1;
310         }
311
312         write_unlock_bh(&mrt_lock);
313
314         dev_set_allmulti(dev, -1);
315
316         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
317                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
318                 ip_rt_multicast_event(in_dev);
319         }
320
321         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
322                 unregister_netdevice_queue(dev, head);
323
324         dev_put(dev);
325         return 0;
326 }
327
328 static inline void ipmr_cache_free(struct mfc_cache *c)
329 {
330         kmem_cache_free(mrt_cachep, c);
331 }
332
333 /* Destroy an unresolved cache entry, killing queued skbs
334    and reporting error to netlink readers.
335  */
336
337 static void ipmr_destroy_unres(struct net *net, struct mfc_cache *c)
338 {
339         struct sk_buff *skb;
340         struct nlmsgerr *e;
341
342         atomic_dec(&net->ipv4.cache_resolve_queue_len);
343
344         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
345                 if (ip_hdr(skb)->version == 0) {
346                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
347                         nlh->nlmsg_type = NLMSG_ERROR;
348                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
349                         skb_trim(skb, nlh->nlmsg_len);
350                         e = NLMSG_DATA(nlh);
351                         e->error = -ETIMEDOUT;
352                         memset(&e->msg, 0, sizeof(e->msg));
353
354                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
355                 } else
356                         kfree_skb(skb);
357         }
358
359         ipmr_cache_free(c);
360 }
361
362
363 /* Timer process for the unresolved queue. */
364
365 static void ipmr_expire_process(unsigned long arg)
366 {
367         struct net *net = (struct net *)arg;
368         unsigned long now;
369         unsigned long expires;
370         struct mfc_cache *c, **cp;
371
372         if (!spin_trylock(&mfc_unres_lock)) {
373                 mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
374                 return;
375         }
376
377         if (net->ipv4.mfc_unres_queue == NULL)
378                 goto out;
379
380         now = jiffies;
381         expires = 10*HZ;
382         cp = &net->ipv4.mfc_unres_queue;
383
384         while ((c=*cp) != NULL) {
385                 if (time_after(c->mfc_un.unres.expires, now)) {
386                         unsigned long interval = c->mfc_un.unres.expires - now;
387                         if (interval < expires)
388                                 expires = interval;
389                         cp = &c->next;
390                         continue;
391                 }
392
393                 *cp = c->next;
394
395                 ipmr_destroy_unres(net, c);
396         }
397
398         if (net->ipv4.mfc_unres_queue != NULL)
399                 mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
400
401 out:
402         spin_unlock(&mfc_unres_lock);
403 }
404
405 /* Fill oifs list. It is called under write locked mrt_lock. */
406
407 static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
408                                    unsigned char *ttls)
409 {
410         int vifi;
411
412         cache->mfc_un.res.minvif = MAXVIFS;
413         cache->mfc_un.res.maxvif = 0;
414         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
415
416         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
417                 if (VIF_EXISTS(net, vifi) &&
418                     ttls[vifi] && ttls[vifi] < 255) {
419                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
420                         if (cache->mfc_un.res.minvif > vifi)
421                                 cache->mfc_un.res.minvif = vifi;
422                         if (cache->mfc_un.res.maxvif <= vifi)
423                                 cache->mfc_un.res.maxvif = vifi + 1;
424                 }
425         }
426 }
427
428 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
429 {
430         int vifi = vifc->vifc_vifi;
431         struct vif_device *v = &net->ipv4.vif_table[vifi];
432         struct net_device *dev;
433         struct in_device *in_dev;
434         int err;
435
436         /* Is vif busy ? */
437         if (VIF_EXISTS(net, vifi))
438                 return -EADDRINUSE;
439
440         switch (vifc->vifc_flags) {
441 #ifdef CONFIG_IP_PIMSM
442         case VIFF_REGISTER:
443                 /*
444                  * Special Purpose VIF in PIM
445                  * All the packets will be sent to the daemon
446                  */
447                 if (net->ipv4.mroute_reg_vif_num >= 0)
448                         return -EADDRINUSE;
449                 dev = ipmr_reg_vif(net);
450                 if (!dev)
451                         return -ENOBUFS;
452                 err = dev_set_allmulti(dev, 1);
453                 if (err) {
454                         unregister_netdevice(dev);
455                         dev_put(dev);
456                         return err;
457                 }
458                 break;
459 #endif
460         case VIFF_TUNNEL:
461                 dev = ipmr_new_tunnel(net, vifc);
462                 if (!dev)
463                         return -ENOBUFS;
464                 err = dev_set_allmulti(dev, 1);
465                 if (err) {
466                         ipmr_del_tunnel(dev, vifc);
467                         dev_put(dev);
468                         return err;
469                 }
470                 break;
471
472         case VIFF_USE_IFINDEX:
473         case 0:
474                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
475                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
476                         if (dev && dev->ip_ptr == NULL) {
477                                 dev_put(dev);
478                                 return -EADDRNOTAVAIL;
479                         }
480                 } else
481                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
482
483                 if (!dev)
484                         return -EADDRNOTAVAIL;
485                 err = dev_set_allmulti(dev, 1);
486                 if (err) {
487                         dev_put(dev);
488                         return err;
489                 }
490                 break;
491         default:
492                 return -EINVAL;
493         }
494
495         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
496                 dev_put(dev);
497                 return -EADDRNOTAVAIL;
498         }
499         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
500         ip_rt_multicast_event(in_dev);
501
502         /*
503          *      Fill in the VIF structures
504          */
505         v->rate_limit = vifc->vifc_rate_limit;
506         v->local = vifc->vifc_lcl_addr.s_addr;
507         v->remote = vifc->vifc_rmt_addr.s_addr;
508         v->flags = vifc->vifc_flags;
509         if (!mrtsock)
510                 v->flags |= VIFF_STATIC;
511         v->threshold = vifc->vifc_threshold;
512         v->bytes_in = 0;
513         v->bytes_out = 0;
514         v->pkt_in = 0;
515         v->pkt_out = 0;
516         v->link = dev->ifindex;
517         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
518                 v->link = dev->iflink;
519
520         /* And finish update writing critical data */
521         write_lock_bh(&mrt_lock);
522         v->dev = dev;
523 #ifdef CONFIG_IP_PIMSM
524         if (v->flags&VIFF_REGISTER)
525                 net->ipv4.mroute_reg_vif_num = vifi;
526 #endif
527         if (vifi+1 > net->ipv4.maxvif)
528                 net->ipv4.maxvif = vifi+1;
529         write_unlock_bh(&mrt_lock);
530         return 0;
531 }
532
533 static struct mfc_cache *ipmr_cache_find(struct net *net,
534                                          __be32 origin,
535                                          __be32 mcastgrp)
536 {
537         int line = MFC_HASH(mcastgrp, origin);
538         struct mfc_cache *c;
539
540         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
541                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
542                         break;
543         }
544         return c;
545 }
546
547 /*
548  *      Allocate a multicast cache entry
549  */
550 static struct mfc_cache *ipmr_cache_alloc(void)
551 {
552         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
553         if (c == NULL)
554                 return NULL;
555         c->mfc_un.res.minvif = MAXVIFS;
556         return c;
557 }
558
559 static struct mfc_cache *ipmr_cache_alloc_unres(void)
560 {
561         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
562         if (c == NULL)
563                 return NULL;
564         skb_queue_head_init(&c->mfc_un.unres.unresolved);
565         c->mfc_un.unres.expires = jiffies + 10*HZ;
566         return c;
567 }
568
569 /*
570  *      A cache entry has gone into a resolved state from queued
571  */
572
573 static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
574                                struct mfc_cache *c)
575 {
576         struct sk_buff *skb;
577         struct nlmsgerr *e;
578
579         /*
580          *      Play the pending entries through our router
581          */
582
583         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
584                 if (ip_hdr(skb)->version == 0) {
585                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
586
587                         if (ipmr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
588                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
589                                                   (u8 *)nlh);
590                         } else {
591                                 nlh->nlmsg_type = NLMSG_ERROR;
592                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
593                                 skb_trim(skb, nlh->nlmsg_len);
594                                 e = NLMSG_DATA(nlh);
595                                 e->error = -EMSGSIZE;
596                                 memset(&e->msg, 0, sizeof(e->msg));
597                         }
598
599                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
600                 } else
601                         ip_mr_forward(net, skb, c, 0);
602         }
603 }
604
605 /*
606  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
607  *      expects the following bizarre scheme.
608  *
609  *      Called under mrt_lock.
610  */
611
612 static int ipmr_cache_report(struct net *net,
613                              struct sk_buff *pkt, vifi_t vifi, int assert)
614 {
615         struct sk_buff *skb;
616         const int ihl = ip_hdrlen(pkt);
617         struct igmphdr *igmp;
618         struct igmpmsg *msg;
619         int ret;
620
621 #ifdef CONFIG_IP_PIMSM
622         if (assert == IGMPMSG_WHOLEPKT)
623                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
624         else
625 #endif
626                 skb = alloc_skb(128, GFP_ATOMIC);
627
628         if (!skb)
629                 return -ENOBUFS;
630
631 #ifdef CONFIG_IP_PIMSM
632         if (assert == IGMPMSG_WHOLEPKT) {
633                 /* Ugly, but we have no choice with this interface.
634                    Duplicate old header, fix ihl, length etc.
635                    And all this only to mangle msg->im_msgtype and
636                    to set msg->im_mbz to "mbz" :-)
637                  */
638                 skb_push(skb, sizeof(struct iphdr));
639                 skb_reset_network_header(skb);
640                 skb_reset_transport_header(skb);
641                 msg = (struct igmpmsg *)skb_network_header(skb);
642                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
643                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
644                 msg->im_mbz = 0;
645                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
646                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
647                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
648                                              sizeof(struct iphdr));
649         } else
650 #endif
651         {
652
653         /*
654          *      Copy the IP header
655          */
656
657         skb->network_header = skb->tail;
658         skb_put(skb, ihl);
659         skb_copy_to_linear_data(skb, pkt->data, ihl);
660         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
661         msg = (struct igmpmsg *)skb_network_header(skb);
662         msg->im_vif = vifi;
663         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
664
665         /*
666          *      Add our header
667          */
668
669         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
670         igmp->type      =
671         msg->im_msgtype = assert;
672         igmp->code      =       0;
673         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
674         skb->transport_header = skb->network_header;
675         }
676
677         if (net->ipv4.mroute_sk == NULL) {
678                 kfree_skb(skb);
679                 return -EINVAL;
680         }
681
682         /*
683          *      Deliver to mrouted
684          */
685         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
686         if (ret < 0) {
687                 if (net_ratelimit())
688                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
689                 kfree_skb(skb);
690         }
691
692         return ret;
693 }
694
695 /*
696  *      Queue a packet for resolution. It gets locked cache entry!
697  */
698
699 static int
700 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
701 {
702         int err;
703         struct mfc_cache *c;
704         const struct iphdr *iph = ip_hdr(skb);
705
706         spin_lock_bh(&mfc_unres_lock);
707         for (c=net->ipv4.mfc_unres_queue; c; c=c->next) {
708                 if (c->mfc_mcastgrp == iph->daddr &&
709                     c->mfc_origin == iph->saddr)
710                         break;
711         }
712
713         if (c == NULL) {
714                 /*
715                  *      Create a new entry if allowable
716                  */
717
718                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
719                     (c = ipmr_cache_alloc_unres()) == NULL) {
720                         spin_unlock_bh(&mfc_unres_lock);
721
722                         kfree_skb(skb);
723                         return -ENOBUFS;
724                 }
725
726                 /*
727                  *      Fill in the new cache entry
728                  */
729                 c->mfc_parent   = -1;
730                 c->mfc_origin   = iph->saddr;
731                 c->mfc_mcastgrp = iph->daddr;
732
733                 /*
734                  *      Reflect first query at mrouted.
735                  */
736                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
737                 if (err < 0) {
738                         /* If the report failed throw the cache entry
739                            out - Brad Parker
740                          */
741                         spin_unlock_bh(&mfc_unres_lock);
742
743                         ipmr_cache_free(c);
744                         kfree_skb(skb);
745                         return err;
746                 }
747
748                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
749                 c->next = net->ipv4.mfc_unres_queue;
750                 net->ipv4.mfc_unres_queue = c;
751
752                 mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
753         }
754
755         /*
756          *      See if we can append the packet
757          */
758         if (c->mfc_un.unres.unresolved.qlen>3) {
759                 kfree_skb(skb);
760                 err = -ENOBUFS;
761         } else {
762                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
763                 err = 0;
764         }
765
766         spin_unlock_bh(&mfc_unres_lock);
767         return err;
768 }
769
770 /*
771  *      MFC cache manipulation by user space mroute daemon
772  */
773
774 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
775 {
776         int line;
777         struct mfc_cache *c, **cp;
778
779         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
780
781         for (cp = &net->ipv4.mfc_cache_array[line];
782              (c = *cp) != NULL; cp = &c->next) {
783                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
784                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
785                         write_lock_bh(&mrt_lock);
786                         *cp = c->next;
787                         write_unlock_bh(&mrt_lock);
788
789                         ipmr_cache_free(c);
790                         return 0;
791                 }
792         }
793         return -ENOENT;
794 }
795
796 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
797 {
798         int line;
799         struct mfc_cache *uc, *c, **cp;
800
801         if (mfc->mfcc_parent >= MAXVIFS)
802                 return -ENFILE;
803
804         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
805
806         for (cp = &net->ipv4.mfc_cache_array[line];
807              (c = *cp) != NULL; cp = &c->next) {
808                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
809                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
810                         break;
811         }
812
813         if (c != NULL) {
814                 write_lock_bh(&mrt_lock);
815                 c->mfc_parent = mfc->mfcc_parent;
816                 ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
817                 if (!mrtsock)
818                         c->mfc_flags |= MFC_STATIC;
819                 write_unlock_bh(&mrt_lock);
820                 return 0;
821         }
822
823         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
824                 return -EINVAL;
825
826         c = ipmr_cache_alloc();
827         if (c == NULL)
828                 return -ENOMEM;
829
830         c->mfc_origin = mfc->mfcc_origin.s_addr;
831         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
832         c->mfc_parent = mfc->mfcc_parent;
833         ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
834         if (!mrtsock)
835                 c->mfc_flags |= MFC_STATIC;
836
837         write_lock_bh(&mrt_lock);
838         c->next = net->ipv4.mfc_cache_array[line];
839         net->ipv4.mfc_cache_array[line] = c;
840         write_unlock_bh(&mrt_lock);
841
842         /*
843          *      Check to see if we resolved a queued list. If so we
844          *      need to send on the frames and tidy up.
845          */
846         spin_lock_bh(&mfc_unres_lock);
847         for (cp = &net->ipv4.mfc_unres_queue; (uc=*cp) != NULL;
848              cp = &uc->next) {
849                 if (uc->mfc_origin == c->mfc_origin &&
850                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
851                         *cp = uc->next;
852                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
853                         break;
854                 }
855         }
856         if (net->ipv4.mfc_unres_queue == NULL)
857                 del_timer(&net->ipv4.ipmr_expire_timer);
858         spin_unlock_bh(&mfc_unres_lock);
859
860         if (uc) {
861                 ipmr_cache_resolve(net, uc, c);
862                 ipmr_cache_free(uc);
863         }
864         return 0;
865 }
866
867 /*
868  *      Close the multicast socket, and clear the vif tables etc
869  */
870
871 static void mroute_clean_tables(struct net *net)
872 {
873         int i;
874         LIST_HEAD(list);
875
876         /*
877          *      Shut down all active vif entries
878          */
879         for (i = 0; i < net->ipv4.maxvif; i++) {
880                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
881                         vif_delete(net, i, 0, &list);
882         }
883         unregister_netdevice_many(&list);
884
885         /*
886          *      Wipe the cache
887          */
888         for (i=0; i<MFC_LINES; i++) {
889                 struct mfc_cache *c, **cp;
890
891                 cp = &net->ipv4.mfc_cache_array[i];
892                 while ((c = *cp) != NULL) {
893                         if (c->mfc_flags&MFC_STATIC) {
894                                 cp = &c->next;
895                                 continue;
896                         }
897                         write_lock_bh(&mrt_lock);
898                         *cp = c->next;
899                         write_unlock_bh(&mrt_lock);
900
901                         ipmr_cache_free(c);
902                 }
903         }
904
905         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
906                 struct mfc_cache *c, **cp;
907
908                 spin_lock_bh(&mfc_unres_lock);
909                 cp = &net->ipv4.mfc_unres_queue;
910                 while ((c = *cp) != NULL) {
911                         *cp = c->next;
912                         ipmr_destroy_unres(net, c);
913                 }
914                 spin_unlock_bh(&mfc_unres_lock);
915         }
916 }
917
918 static void mrtsock_destruct(struct sock *sk)
919 {
920         struct net *net = sock_net(sk);
921
922         rtnl_lock();
923         if (sk == net->ipv4.mroute_sk) {
924                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
925
926                 write_lock_bh(&mrt_lock);
927                 net->ipv4.mroute_sk = NULL;
928                 write_unlock_bh(&mrt_lock);
929
930                 mroute_clean_tables(net);
931         }
932         rtnl_unlock();
933 }
934
935 /*
936  *      Socket options and virtual interface manipulation. The whole
937  *      virtual interface system is a complete heap, but unfortunately
938  *      that's how BSD mrouted happens to think. Maybe one day with a proper
939  *      MOSPF/PIM router set up we can clean this up.
940  */
941
942 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
943 {
944         int ret;
945         struct vifctl vif;
946         struct mfcctl mfc;
947         struct net *net = sock_net(sk);
948
949         if (optname != MRT_INIT) {
950                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
951                         return -EACCES;
952         }
953
954         switch (optname) {
955         case MRT_INIT:
956                 if (sk->sk_type != SOCK_RAW ||
957                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
958                         return -EOPNOTSUPP;
959                 if (optlen != sizeof(int))
960                         return -ENOPROTOOPT;
961
962                 rtnl_lock();
963                 if (net->ipv4.mroute_sk) {
964                         rtnl_unlock();
965                         return -EADDRINUSE;
966                 }
967
968                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
969                 if (ret == 0) {
970                         write_lock_bh(&mrt_lock);
971                         net->ipv4.mroute_sk = sk;
972                         write_unlock_bh(&mrt_lock);
973
974                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
975                 }
976                 rtnl_unlock();
977                 return ret;
978         case MRT_DONE:
979                 if (sk != net->ipv4.mroute_sk)
980                         return -EACCES;
981                 return ip_ra_control(sk, 0, NULL);
982         case MRT_ADD_VIF:
983         case MRT_DEL_VIF:
984                 if (optlen != sizeof(vif))
985                         return -EINVAL;
986                 if (copy_from_user(&vif, optval, sizeof(vif)))
987                         return -EFAULT;
988                 if (vif.vifc_vifi >= MAXVIFS)
989                         return -ENFILE;
990                 rtnl_lock();
991                 if (optname == MRT_ADD_VIF) {
992                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
993                 } else {
994                         ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
995                 }
996                 rtnl_unlock();
997                 return ret;
998
999                 /*
1000                  *      Manipulate the forwarding caches. These live
1001                  *      in a sort of kernel/user symbiosis.
1002                  */
1003         case MRT_ADD_MFC:
1004         case MRT_DEL_MFC:
1005                 if (optlen != sizeof(mfc))
1006                         return -EINVAL;
1007                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1008                         return -EFAULT;
1009                 rtnl_lock();
1010                 if (optname == MRT_DEL_MFC)
1011                         ret = ipmr_mfc_delete(net, &mfc);
1012                 else
1013                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1014                 rtnl_unlock();
1015                 return ret;
1016                 /*
1017                  *      Control PIM assert.
1018                  */
1019         case MRT_ASSERT:
1020         {
1021                 int v;
1022                 if (get_user(v,(int __user *)optval))
1023                         return -EFAULT;
1024                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1025                 return 0;
1026         }
1027 #ifdef CONFIG_IP_PIMSM
1028         case MRT_PIM:
1029         {
1030                 int v;
1031
1032                 if (get_user(v,(int __user *)optval))
1033                         return -EFAULT;
1034                 v = (v) ? 1 : 0;
1035
1036                 rtnl_lock();
1037                 ret = 0;
1038                 if (v != net->ipv4.mroute_do_pim) {
1039                         net->ipv4.mroute_do_pim = v;
1040                         net->ipv4.mroute_do_assert = v;
1041                 }
1042                 rtnl_unlock();
1043                 return ret;
1044         }
1045 #endif
1046         /*
1047          *      Spurious command, or MRT_VERSION which you cannot
1048          *      set.
1049          */
1050         default:
1051                 return -ENOPROTOOPT;
1052         }
1053 }
1054
1055 /*
1056  *      Getsock opt support for the multicast routing system.
1057  */
1058
1059 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1060 {
1061         int olr;
1062         int val;
1063         struct net *net = sock_net(sk);
1064
1065         if (optname != MRT_VERSION &&
1066 #ifdef CONFIG_IP_PIMSM
1067            optname!=MRT_PIM &&
1068 #endif
1069            optname!=MRT_ASSERT)
1070                 return -ENOPROTOOPT;
1071
1072         if (get_user(olr, optlen))
1073                 return -EFAULT;
1074
1075         olr = min_t(unsigned int, olr, sizeof(int));
1076         if (olr < 0)
1077                 return -EINVAL;
1078
1079         if (put_user(olr, optlen))
1080                 return -EFAULT;
1081         if (optname == MRT_VERSION)
1082                 val = 0x0305;
1083 #ifdef CONFIG_IP_PIMSM
1084         else if (optname == MRT_PIM)
1085                 val = net->ipv4.mroute_do_pim;
1086 #endif
1087         else
1088                 val = net->ipv4.mroute_do_assert;
1089         if (copy_to_user(optval, &val, olr))
1090                 return -EFAULT;
1091         return 0;
1092 }
1093
1094 /*
1095  *      The IP multicast ioctl support routines.
1096  */
1097
1098 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1099 {
1100         struct sioc_sg_req sr;
1101         struct sioc_vif_req vr;
1102         struct vif_device *vif;
1103         struct mfc_cache *c;
1104         struct net *net = sock_net(sk);
1105
1106         switch (cmd) {
1107         case SIOCGETVIFCNT:
1108                 if (copy_from_user(&vr, arg, sizeof(vr)))
1109                         return -EFAULT;
1110                 if (vr.vifi >= net->ipv4.maxvif)
1111                         return -EINVAL;
1112                 read_lock(&mrt_lock);
1113                 vif = &net->ipv4.vif_table[vr.vifi];
1114                 if (VIF_EXISTS(net, vr.vifi)) {
1115                         vr.icount = vif->pkt_in;
1116                         vr.ocount = vif->pkt_out;
1117                         vr.ibytes = vif->bytes_in;
1118                         vr.obytes = vif->bytes_out;
1119                         read_unlock(&mrt_lock);
1120
1121                         if (copy_to_user(arg, &vr, sizeof(vr)))
1122                                 return -EFAULT;
1123                         return 0;
1124                 }
1125                 read_unlock(&mrt_lock);
1126                 return -EADDRNOTAVAIL;
1127         case SIOCGETSGCNT:
1128                 if (copy_from_user(&sr, arg, sizeof(sr)))
1129                         return -EFAULT;
1130
1131                 read_lock(&mrt_lock);
1132                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1133                 if (c) {
1134                         sr.pktcnt = c->mfc_un.res.pkt;
1135                         sr.bytecnt = c->mfc_un.res.bytes;
1136                         sr.wrong_if = c->mfc_un.res.wrong_if;
1137                         read_unlock(&mrt_lock);
1138
1139                         if (copy_to_user(arg, &sr, sizeof(sr)))
1140                                 return -EFAULT;
1141                         return 0;
1142                 }
1143                 read_unlock(&mrt_lock);
1144                 return -EADDRNOTAVAIL;
1145         default:
1146                 return -ENOIOCTLCMD;
1147         }
1148 }
1149
1150
1151 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1152 {
1153         struct net_device *dev = ptr;
1154         struct net *net = dev_net(dev);
1155         struct vif_device *v;
1156         int ct;
1157         LIST_HEAD(list);
1158
1159         if (event != NETDEV_UNREGISTER)
1160                 return NOTIFY_DONE;
1161         v = &net->ipv4.vif_table[0];
1162         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1163                 if (v->dev == dev)
1164                         vif_delete(net, ct, 1, &list);
1165         }
1166         unregister_netdevice_many(&list);
1167         return NOTIFY_DONE;
1168 }
1169
1170
1171 static struct notifier_block ip_mr_notifier = {
1172         .notifier_call = ipmr_device_event,
1173 };
1174
1175 /*
1176  *      Encapsulate a packet by attaching a valid IPIP header to it.
1177  *      This avoids tunnel drivers and other mess and gives us the speed so
1178  *      important for multicast video.
1179  */
1180
1181 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1182 {
1183         struct iphdr *iph;
1184         struct iphdr *old_iph = ip_hdr(skb);
1185
1186         skb_push(skb, sizeof(struct iphdr));
1187         skb->transport_header = skb->network_header;
1188         skb_reset_network_header(skb);
1189         iph = ip_hdr(skb);
1190
1191         iph->version    =       4;
1192         iph->tos        =       old_iph->tos;
1193         iph->ttl        =       old_iph->ttl;
1194         iph->frag_off   =       0;
1195         iph->daddr      =       daddr;
1196         iph->saddr      =       saddr;
1197         iph->protocol   =       IPPROTO_IPIP;
1198         iph->ihl        =       5;
1199         iph->tot_len    =       htons(skb->len);
1200         ip_select_ident(iph, skb_dst(skb), NULL);
1201         ip_send_check(iph);
1202
1203         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1204         nf_reset(skb);
1205 }
1206
1207 static inline int ipmr_forward_finish(struct sk_buff *skb)
1208 {
1209         struct ip_options * opt = &(IPCB(skb)->opt);
1210
1211         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1212
1213         if (unlikely(opt->optlen))
1214                 ip_forward_options(skb);
1215
1216         return dst_output(skb);
1217 }
1218
1219 /*
1220  *      Processing handlers for ipmr_forward
1221  */
1222
1223 static void ipmr_queue_xmit(struct net *net, struct sk_buff *skb,
1224                             struct mfc_cache *c, int vifi)
1225 {
1226         const struct iphdr *iph = ip_hdr(skb);
1227         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1228         struct net_device *dev;
1229         struct rtable *rt;
1230         int    encap = 0;
1231
1232         if (vif->dev == NULL)
1233                 goto out_free;
1234
1235 #ifdef CONFIG_IP_PIMSM
1236         if (vif->flags & VIFF_REGISTER) {
1237                 vif->pkt_out++;
1238                 vif->bytes_out += skb->len;
1239                 vif->dev->stats.tx_bytes += skb->len;
1240                 vif->dev->stats.tx_packets++;
1241                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1242                 goto out_free;
1243         }
1244 #endif
1245
1246         if (vif->flags&VIFF_TUNNEL) {
1247                 struct flowi fl = { .oif = vif->link,
1248                                     .nl_u = { .ip4_u =
1249                                               { .daddr = vif->remote,
1250                                                 .saddr = vif->local,
1251                                                 .tos = RT_TOS(iph->tos) } },
1252                                     .proto = IPPROTO_IPIP };
1253                 if (ip_route_output_key(net, &rt, &fl))
1254                         goto out_free;
1255                 encap = sizeof(struct iphdr);
1256         } else {
1257                 struct flowi fl = { .oif = vif->link,
1258                                     .nl_u = { .ip4_u =
1259                                               { .daddr = iph->daddr,
1260                                                 .tos = RT_TOS(iph->tos) } },
1261                                     .proto = IPPROTO_IPIP };
1262                 if (ip_route_output_key(net, &rt, &fl))
1263                         goto out_free;
1264         }
1265
1266         dev = rt->u.dst.dev;
1267
1268         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1269                 /* Do not fragment multicasts. Alas, IPv4 does not
1270                    allow to send ICMP, so that packets will disappear
1271                    to blackhole.
1272                  */
1273
1274                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1275                 ip_rt_put(rt);
1276                 goto out_free;
1277         }
1278
1279         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1280
1281         if (skb_cow(skb, encap)) {
1282                 ip_rt_put(rt);
1283                 goto out_free;
1284         }
1285
1286         vif->pkt_out++;
1287         vif->bytes_out += skb->len;
1288
1289         skb_dst_drop(skb);
1290         skb_dst_set(skb, &rt->u.dst);
1291         ip_decrease_ttl(ip_hdr(skb));
1292
1293         /* FIXME: forward and output firewalls used to be called here.
1294          * What do we do with netfilter? -- RR */
1295         if (vif->flags & VIFF_TUNNEL) {
1296                 ip_encap(skb, vif->local, vif->remote);
1297                 /* FIXME: extra output firewall step used to be here. --RR */
1298                 vif->dev->stats.tx_packets++;
1299                 vif->dev->stats.tx_bytes += skb->len;
1300         }
1301
1302         IPCB(skb)->flags |= IPSKB_FORWARDED;
1303
1304         /*
1305          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1306          * not only before forwarding, but after forwarding on all output
1307          * interfaces. It is clear, if mrouter runs a multicasting
1308          * program, it should receive packets not depending to what interface
1309          * program is joined.
1310          * If we will not make it, the program will have to join on all
1311          * interfaces. On the other hand, multihoming host (or router, but
1312          * not mrouter) cannot join to more than one interface - it will
1313          * result in receiving multiple packets.
1314          */
1315         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1316                 ipmr_forward_finish);
1317         return;
1318
1319 out_free:
1320         kfree_skb(skb);
1321         return;
1322 }
1323
1324 static int ipmr_find_vif(struct net_device *dev)
1325 {
1326         struct net *net = dev_net(dev);
1327         int ct;
1328         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1329                 if (net->ipv4.vif_table[ct].dev == dev)
1330                         break;
1331         }
1332         return ct;
1333 }
1334
1335 /* "local" means that we should preserve one skb (for local delivery) */
1336
1337 static int ip_mr_forward(struct net *net, struct sk_buff *skb,
1338                          struct mfc_cache *cache, int local)
1339 {
1340         int psend = -1;
1341         int vif, ct;
1342
1343         vif = cache->mfc_parent;
1344         cache->mfc_un.res.pkt++;
1345         cache->mfc_un.res.bytes += skb->len;
1346
1347         /*
1348          * Wrong interface: drop packet and (maybe) send PIM assert.
1349          */
1350         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1351                 int true_vifi;
1352
1353                 if (skb_rtable(skb)->fl.iif == 0) {
1354                         /* It is our own packet, looped back.
1355                            Very complicated situation...
1356
1357                            The best workaround until routing daemons will be
1358                            fixed is not to redistribute packet, if it was
1359                            send through wrong interface. It means, that
1360                            multicast applications WILL NOT work for
1361                            (S,G), which have default multicast route pointing
1362                            to wrong oif. In any case, it is not a good
1363                            idea to use multicasting applications on router.
1364                          */
1365                         goto dont_forward;
1366                 }
1367
1368                 cache->mfc_un.res.wrong_if++;
1369                 true_vifi = ipmr_find_vif(skb->dev);
1370
1371                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1372                     /* pimsm uses asserts, when switching from RPT to SPT,
1373                        so that we cannot check that packet arrived on an oif.
1374                        It is bad, but otherwise we would need to move pretty
1375                        large chunk of pimd to kernel. Ough... --ANK
1376                      */
1377                     (net->ipv4.mroute_do_pim ||
1378                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1379                     time_after(jiffies,
1380                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1381                         cache->mfc_un.res.last_assert = jiffies;
1382                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1383                 }
1384                 goto dont_forward;
1385         }
1386
1387         net->ipv4.vif_table[vif].pkt_in++;
1388         net->ipv4.vif_table[vif].bytes_in += skb->len;
1389
1390         /*
1391          *      Forward the frame
1392          */
1393         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1394                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1395                         if (psend != -1) {
1396                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1397                                 if (skb2)
1398                                         ipmr_queue_xmit(net, skb2, cache, psend);
1399                         }
1400                         psend = ct;
1401                 }
1402         }
1403         if (psend != -1) {
1404                 if (local) {
1405                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1406                         if (skb2)
1407                                 ipmr_queue_xmit(net, skb2, cache, psend);
1408                 } else {
1409                         ipmr_queue_xmit(net, skb, cache, psend);
1410                         return 0;
1411                 }
1412         }
1413
1414 dont_forward:
1415         if (!local)
1416                 kfree_skb(skb);
1417         return 0;
1418 }
1419
1420
1421 /*
1422  *      Multicast packets for forwarding arrive here
1423  */
1424
1425 int ip_mr_input(struct sk_buff *skb)
1426 {
1427         struct mfc_cache *cache;
1428         struct net *net = dev_net(skb->dev);
1429         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1430
1431         /* Packet is looped back after forward, it should not be
1432            forwarded second time, but still can be delivered locally.
1433          */
1434         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1435                 goto dont_forward;
1436
1437         if (!local) {
1438                     if (IPCB(skb)->opt.router_alert) {
1439                             if (ip_call_ra_chain(skb))
1440                                     return 0;
1441                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1442                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1443                                Cisco IOS <= 11.2(8)) do not put router alert
1444                                option to IGMP packets destined to routable
1445                                groups. It is very bad, because it means
1446                                that we can forward NO IGMP messages.
1447                              */
1448                             read_lock(&mrt_lock);
1449                             if (net->ipv4.mroute_sk) {
1450                                     nf_reset(skb);
1451                                     raw_rcv(net->ipv4.mroute_sk, skb);
1452                                     read_unlock(&mrt_lock);
1453                                     return 0;
1454                             }
1455                             read_unlock(&mrt_lock);
1456                     }
1457         }
1458
1459         read_lock(&mrt_lock);
1460         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1461
1462         /*
1463          *      No usable cache entry
1464          */
1465         if (cache == NULL) {
1466                 int vif;
1467
1468                 if (local) {
1469                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1470                         ip_local_deliver(skb);
1471                         if (skb2 == NULL) {
1472                                 read_unlock(&mrt_lock);
1473                                 return -ENOBUFS;
1474                         }
1475                         skb = skb2;
1476                 }
1477
1478                 vif = ipmr_find_vif(skb->dev);
1479                 if (vif >= 0) {
1480                         int err = ipmr_cache_unresolved(net, vif, skb);
1481                         read_unlock(&mrt_lock);
1482
1483                         return err;
1484                 }
1485                 read_unlock(&mrt_lock);
1486                 kfree_skb(skb);
1487                 return -ENODEV;
1488         }
1489
1490         ip_mr_forward(net, skb, cache, local);
1491
1492         read_unlock(&mrt_lock);
1493
1494         if (local)
1495                 return ip_local_deliver(skb);
1496
1497         return 0;
1498
1499 dont_forward:
1500         if (local)
1501                 return ip_local_deliver(skb);
1502         kfree_skb(skb);
1503         return 0;
1504 }
1505
1506 #ifdef CONFIG_IP_PIMSM
1507 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1508 {
1509         struct net_device *reg_dev = NULL;
1510         struct iphdr *encap;
1511         struct net *net = dev_net(skb->dev);
1512
1513         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1514         /*
1515            Check that:
1516            a. packet is really destinted to a multicast group
1517            b. packet is not a NULL-REGISTER
1518            c. packet is not truncated
1519          */
1520         if (!ipv4_is_multicast(encap->daddr) ||
1521             encap->tot_len == 0 ||
1522             ntohs(encap->tot_len) + pimlen > skb->len)
1523                 return 1;
1524
1525         read_lock(&mrt_lock);
1526         if (net->ipv4.mroute_reg_vif_num >= 0)
1527                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1528         if (reg_dev)
1529                 dev_hold(reg_dev);
1530         read_unlock(&mrt_lock);
1531
1532         if (reg_dev == NULL)
1533                 return 1;
1534
1535         skb->mac_header = skb->network_header;
1536         skb_pull(skb, (u8*)encap - skb->data);
1537         skb_reset_network_header(skb);
1538         skb->dev = reg_dev;
1539         skb->protocol = htons(ETH_P_IP);
1540         skb->ip_summed = 0;
1541         skb->pkt_type = PACKET_HOST;
1542         skb_dst_drop(skb);
1543         reg_dev->stats.rx_bytes += skb->len;
1544         reg_dev->stats.rx_packets++;
1545         nf_reset(skb);
1546         netif_rx(skb);
1547         dev_put(reg_dev);
1548
1549         return 0;
1550 }
1551 #endif
1552
1553 #ifdef CONFIG_IP_PIMSM_V1
1554 /*
1555  * Handle IGMP messages of PIMv1
1556  */
1557
1558 int pim_rcv_v1(struct sk_buff * skb)
1559 {
1560         struct igmphdr *pim;
1561         struct net *net = dev_net(skb->dev);
1562
1563         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1564                 goto drop;
1565
1566         pim = igmp_hdr(skb);
1567
1568         if (!net->ipv4.mroute_do_pim ||
1569             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1570                 goto drop;
1571
1572         if (__pim_rcv(skb, sizeof(*pim))) {
1573 drop:
1574                 kfree_skb(skb);
1575         }
1576         return 0;
1577 }
1578 #endif
1579
1580 #ifdef CONFIG_IP_PIMSM_V2
1581 static int pim_rcv(struct sk_buff * skb)
1582 {
1583         struct pimreghdr *pim;
1584
1585         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1586                 goto drop;
1587
1588         pim = (struct pimreghdr *)skb_transport_header(skb);
1589         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1590             (pim->flags&PIM_NULL_REGISTER) ||
1591             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1592              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1593                 goto drop;
1594
1595         if (__pim_rcv(skb, sizeof(*pim))) {
1596 drop:
1597                 kfree_skb(skb);
1598         }
1599         return 0;
1600 }
1601 #endif
1602
1603 static int
1604 ipmr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc_cache *c,
1605                  struct rtmsg *rtm)
1606 {
1607         int ct;
1608         struct rtnexthop *nhp;
1609         u8 *b = skb_tail_pointer(skb);
1610         struct rtattr *mp_head;
1611
1612         /* If cache is unresolved, don't try to parse IIF and OIF */
1613         if (c->mfc_parent > MAXVIFS)
1614                 return -ENOENT;
1615
1616         if (VIF_EXISTS(net, c->mfc_parent))
1617                 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
1618
1619         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1620
1621         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1622                 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1623                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1624                                 goto rtattr_failure;
1625                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1626                         nhp->rtnh_flags = 0;
1627                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1628                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1629                         nhp->rtnh_len = sizeof(*nhp);
1630                 }
1631         }
1632         mp_head->rta_type = RTA_MULTIPATH;
1633         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1634         rtm->rtm_type = RTN_MULTICAST;
1635         return 1;
1636
1637 rtattr_failure:
1638         nlmsg_trim(skb, b);
1639         return -EMSGSIZE;
1640 }
1641
1642 int ipmr_get_route(struct net *net,
1643                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1644 {
1645         int err;
1646         struct mfc_cache *cache;
1647         struct rtable *rt = skb_rtable(skb);
1648
1649         read_lock(&mrt_lock);
1650         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1651
1652         if (cache == NULL) {
1653                 struct sk_buff *skb2;
1654                 struct iphdr *iph;
1655                 struct net_device *dev;
1656                 int vif;
1657
1658                 if (nowait) {
1659                         read_unlock(&mrt_lock);
1660                         return -EAGAIN;
1661                 }
1662
1663                 dev = skb->dev;
1664                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1665                         read_unlock(&mrt_lock);
1666                         return -ENODEV;
1667                 }
1668                 skb2 = skb_clone(skb, GFP_ATOMIC);
1669                 if (!skb2) {
1670                         read_unlock(&mrt_lock);
1671                         return -ENOMEM;
1672                 }
1673
1674                 skb_push(skb2, sizeof(struct iphdr));
1675                 skb_reset_network_header(skb2);
1676                 iph = ip_hdr(skb2);
1677                 iph->ihl = sizeof(struct iphdr) >> 2;
1678                 iph->saddr = rt->rt_src;
1679                 iph->daddr = rt->rt_dst;
1680                 iph->version = 0;
1681                 err = ipmr_cache_unresolved(net, vif, skb2);
1682                 read_unlock(&mrt_lock);
1683                 return err;
1684         }
1685
1686         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1687                 cache->mfc_flags |= MFC_NOTIFY;
1688         err = ipmr_fill_mroute(net, skb, cache, rtm);
1689         read_unlock(&mrt_lock);
1690         return err;
1691 }
1692
1693 #ifdef CONFIG_PROC_FS
1694 /*
1695  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1696  */
1697 struct ipmr_vif_iter {
1698         struct seq_net_private p;
1699         int ct;
1700 };
1701
1702 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1703                                            struct ipmr_vif_iter *iter,
1704                                            loff_t pos)
1705 {
1706         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1707                 if (!VIF_EXISTS(net, iter->ct))
1708                         continue;
1709                 if (pos-- == 0)
1710                         return &net->ipv4.vif_table[iter->ct];
1711         }
1712         return NULL;
1713 }
1714
1715 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1716         __acquires(mrt_lock)
1717 {
1718         struct net *net = seq_file_net(seq);
1719
1720         read_lock(&mrt_lock);
1721         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1722                 : SEQ_START_TOKEN;
1723 }
1724
1725 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1726 {
1727         struct ipmr_vif_iter *iter = seq->private;
1728         struct net *net = seq_file_net(seq);
1729
1730         ++*pos;
1731         if (v == SEQ_START_TOKEN)
1732                 return ipmr_vif_seq_idx(net, iter, 0);
1733
1734         while (++iter->ct < net->ipv4.maxvif) {
1735                 if (!VIF_EXISTS(net, iter->ct))
1736                         continue;
1737                 return &net->ipv4.vif_table[iter->ct];
1738         }
1739         return NULL;
1740 }
1741
1742 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1743         __releases(mrt_lock)
1744 {
1745         read_unlock(&mrt_lock);
1746 }
1747
1748 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1749 {
1750         struct net *net = seq_file_net(seq);
1751
1752         if (v == SEQ_START_TOKEN) {
1753                 seq_puts(seq,
1754                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1755         } else {
1756                 const struct vif_device *vif = v;
1757                 const char *name =  vif->dev ? vif->dev->name : "none";
1758
1759                 seq_printf(seq,
1760                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1761                            vif - net->ipv4.vif_table,
1762                            name, vif->bytes_in, vif->pkt_in,
1763                            vif->bytes_out, vif->pkt_out,
1764                            vif->flags, vif->local, vif->remote);
1765         }
1766         return 0;
1767 }
1768
1769 static const struct seq_operations ipmr_vif_seq_ops = {
1770         .start = ipmr_vif_seq_start,
1771         .next  = ipmr_vif_seq_next,
1772         .stop  = ipmr_vif_seq_stop,
1773         .show  = ipmr_vif_seq_show,
1774 };
1775
1776 static int ipmr_vif_open(struct inode *inode, struct file *file)
1777 {
1778         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1779                             sizeof(struct ipmr_vif_iter));
1780 }
1781
1782 static const struct file_operations ipmr_vif_fops = {
1783         .owner   = THIS_MODULE,
1784         .open    = ipmr_vif_open,
1785         .read    = seq_read,
1786         .llseek  = seq_lseek,
1787         .release = seq_release_net,
1788 };
1789
1790 struct ipmr_mfc_iter {
1791         struct seq_net_private p;
1792         struct mfc_cache **cache;
1793         int ct;
1794 };
1795
1796
1797 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1798                                           struct ipmr_mfc_iter *it, loff_t pos)
1799 {
1800         struct mfc_cache *mfc;
1801
1802         it->cache = net->ipv4.mfc_cache_array;
1803         read_lock(&mrt_lock);
1804         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1805                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1806                      mfc; mfc = mfc->next)
1807                         if (pos-- == 0)
1808                                 return mfc;
1809         read_unlock(&mrt_lock);
1810
1811         it->cache = &net->ipv4.mfc_unres_queue;
1812         spin_lock_bh(&mfc_unres_lock);
1813         for (mfc = net->ipv4.mfc_unres_queue; mfc; mfc = mfc->next)
1814                 if (pos-- == 0)
1815                         return mfc;
1816         spin_unlock_bh(&mfc_unres_lock);
1817
1818         it->cache = NULL;
1819         return NULL;
1820 }
1821
1822
1823 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1824 {
1825         struct ipmr_mfc_iter *it = seq->private;
1826         struct net *net = seq_file_net(seq);
1827
1828         it->cache = NULL;
1829         it->ct = 0;
1830         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1831                 : SEQ_START_TOKEN;
1832 }
1833
1834 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1835 {
1836         struct mfc_cache *mfc = v;
1837         struct ipmr_mfc_iter *it = seq->private;
1838         struct net *net = seq_file_net(seq);
1839
1840         ++*pos;
1841
1842         if (v == SEQ_START_TOKEN)
1843                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1844
1845         if (mfc->next)
1846                 return mfc->next;
1847
1848         if (it->cache == &net->ipv4.mfc_unres_queue)
1849                 goto end_of_list;
1850
1851         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1852
1853         while (++it->ct < MFC_LINES) {
1854                 mfc = net->ipv4.mfc_cache_array[it->ct];
1855                 if (mfc)
1856                         return mfc;
1857         }
1858
1859         /* exhausted cache_array, show unresolved */
1860         read_unlock(&mrt_lock);
1861         it->cache = &net->ipv4.mfc_unres_queue;
1862         it->ct = 0;
1863
1864         spin_lock_bh(&mfc_unres_lock);
1865         mfc = net->ipv4.mfc_unres_queue;
1866         if (mfc)
1867                 return mfc;
1868
1869  end_of_list:
1870         spin_unlock_bh(&mfc_unres_lock);
1871         it->cache = NULL;
1872
1873         return NULL;
1874 }
1875
1876 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1877 {
1878         struct ipmr_mfc_iter *it = seq->private;
1879         struct net *net = seq_file_net(seq);
1880
1881         if (it->cache == &net->ipv4.mfc_unres_queue)
1882                 spin_unlock_bh(&mfc_unres_lock);
1883         else if (it->cache == net->ipv4.mfc_cache_array)
1884                 read_unlock(&mrt_lock);
1885 }
1886
1887 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1888 {
1889         int n;
1890         struct net *net = seq_file_net(seq);
1891
1892         if (v == SEQ_START_TOKEN) {
1893                 seq_puts(seq,
1894                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1895         } else {
1896                 const struct mfc_cache *mfc = v;
1897                 const struct ipmr_mfc_iter *it = seq->private;
1898
1899                 seq_printf(seq, "%08lX %08lX %-3hd",
1900                            (unsigned long) mfc->mfc_mcastgrp,
1901                            (unsigned long) mfc->mfc_origin,
1902                            mfc->mfc_parent);
1903
1904                 if (it->cache != &net->ipv4.mfc_unres_queue) {
1905                         seq_printf(seq, " %8lu %8lu %8lu",
1906                                    mfc->mfc_un.res.pkt,
1907                                    mfc->mfc_un.res.bytes,
1908                                    mfc->mfc_un.res.wrong_if);
1909                         for (n = mfc->mfc_un.res.minvif;
1910                              n < mfc->mfc_un.res.maxvif; n++ ) {
1911                                 if (VIF_EXISTS(net, n) &&
1912                                     mfc->mfc_un.res.ttls[n] < 255)
1913                                         seq_printf(seq,
1914                                            " %2d:%-3d",
1915                                            n, mfc->mfc_un.res.ttls[n]);
1916                         }
1917                 } else {
1918                         /* unresolved mfc_caches don't contain
1919                          * pkt, bytes and wrong_if values
1920                          */
1921                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1922                 }
1923                 seq_putc(seq, '\n');
1924         }
1925         return 0;
1926 }
1927
1928 static const struct seq_operations ipmr_mfc_seq_ops = {
1929         .start = ipmr_mfc_seq_start,
1930         .next  = ipmr_mfc_seq_next,
1931         .stop  = ipmr_mfc_seq_stop,
1932         .show  = ipmr_mfc_seq_show,
1933 };
1934
1935 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1936 {
1937         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1938                             sizeof(struct ipmr_mfc_iter));
1939 }
1940
1941 static const struct file_operations ipmr_mfc_fops = {
1942         .owner   = THIS_MODULE,
1943         .open    = ipmr_mfc_open,
1944         .read    = seq_read,
1945         .llseek  = seq_lseek,
1946         .release = seq_release_net,
1947 };
1948 #endif
1949
1950 #ifdef CONFIG_IP_PIMSM_V2
1951 static const struct net_protocol pim_protocol = {
1952         .handler        =       pim_rcv,
1953         .netns_ok       =       1,
1954 };
1955 #endif
1956
1957
1958 /*
1959  *      Setup for IP multicast routing
1960  */
1961 static int __net_init ipmr_net_init(struct net *net)
1962 {
1963         int err = 0;
1964
1965         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1966                                       GFP_KERNEL);
1967         if (!net->ipv4.vif_table) {
1968                 err = -ENOMEM;
1969                 goto fail;
1970         }
1971
1972         /* Forwarding cache */
1973         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1974                                             sizeof(struct mfc_cache *),
1975                                             GFP_KERNEL);
1976         if (!net->ipv4.mfc_cache_array) {
1977                 err = -ENOMEM;
1978                 goto fail_mfc_cache;
1979         }
1980
1981         setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
1982                     (unsigned long)net);
1983
1984 #ifdef CONFIG_IP_PIMSM
1985         net->ipv4.mroute_reg_vif_num = -1;
1986 #endif
1987
1988 #ifdef CONFIG_PROC_FS
1989         err = -ENOMEM;
1990         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1991                 goto proc_vif_fail;
1992         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1993                 goto proc_cache_fail;
1994 #endif
1995         return 0;
1996
1997 #ifdef CONFIG_PROC_FS
1998 proc_cache_fail:
1999         proc_net_remove(net, "ip_mr_vif");
2000 proc_vif_fail:
2001         kfree(net->ipv4.mfc_cache_array);
2002 #endif
2003 fail_mfc_cache:
2004         kfree(net->ipv4.vif_table);
2005 fail:
2006         return err;
2007 }
2008
2009 static void __net_exit ipmr_net_exit(struct net *net)
2010 {
2011 #ifdef CONFIG_PROC_FS
2012         proc_net_remove(net, "ip_mr_cache");
2013         proc_net_remove(net, "ip_mr_vif");
2014 #endif
2015         kfree(net->ipv4.mfc_cache_array);
2016         kfree(net->ipv4.vif_table);
2017 }
2018
2019 static struct pernet_operations ipmr_net_ops = {
2020         .init = ipmr_net_init,
2021         .exit = ipmr_net_exit,
2022 };
2023
2024 int __init ip_mr_init(void)
2025 {
2026         int err;
2027
2028         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2029                                        sizeof(struct mfc_cache),
2030                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2031                                        NULL);
2032         if (!mrt_cachep)
2033                 return -ENOMEM;
2034
2035         err = register_pernet_subsys(&ipmr_net_ops);
2036         if (err)
2037                 goto reg_pernet_fail;
2038
2039         err = register_netdevice_notifier(&ip_mr_notifier);
2040         if (err)
2041                 goto reg_notif_fail;
2042 #ifdef CONFIG_IP_PIMSM_V2
2043         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2044                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2045                 err = -EAGAIN;
2046                 goto add_proto_fail;
2047         }
2048 #endif
2049         return 0;
2050
2051 #ifdef CONFIG_IP_PIMSM_V2
2052 add_proto_fail:
2053         unregister_netdevice_notifier(&ip_mr_notifier);
2054 #endif
2055 reg_notif_fail:
2056         unregister_pernet_subsys(&ipmr_net_ops);
2057 reg_pernet_fail:
2058         kmem_cache_destroy(mrt_cachep);
2059         return err;
2060 }