net/ipv4/ipmr.c

   1 /*
   2  *      IP multicast routing support for mrouted 3.6/3.8
   3  *
   4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *        Linux Consultancy and Custom Driver Development
   6  *
   7  *      This program is free software; you can redistribute it and/or
   8  *      modify it under the terms of the GNU General Public License
   9  *      as published by the Free Software Foundation; either version
  10  *      2 of the License, or (at your option) any later version.
  11  *
  12  *      Fixes:
  13  *      Michael Chastain        :       Incorrect size of copying.
  14  *      Alan Cox                :       Added the cache manager code
  15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
  16  *      Mike McLagan            :       Routing by source
  17  *      Malcolm Beattie         :       Buffer handling fixes.
  18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
  19  *      SVR Anand               :       Fixed several multicast bugs and problems.
  20  *      Alexey Kuznetsov        :       Status, optimisations and more.
  21  *      Brad Parker             :       Better behaviour on mrouted upcall
  22  *                                      overflow.
  23  *      Carlos Picoto           :       PIMv1 Support
  24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
  25  *                                      Relax this requrement to work with older peers.
  26  *
  27  */
  28
  29 #include <asm/system.h>
  30 #include <asm/uaccess.h>
  31 #include <linux/types.h>
  32 #include <linux/capability.h>
  33 #include <linux/errno.h>
  34 #include <linux/timer.h>
  35 #include <linux/mm.h>
  36 #include <linux/kernel.h>
  37 #include <linux/fcntl.h>
  38 #include <linux/stat.h>
  39 #include <linux/socket.h>
  40 #include <linux/in.h>
  41 #include <linux/inet.h>
  42 #include <linux/netdevice.h>
  43 #include <linux/inetdevice.h>
  44 #include <linux/igmp.h>
  45 #include <linux/proc_fs.h>
  46 #include <linux/seq_file.h>
  47 #include <linux/mroute.h>
  48 #include <linux/init.h>
  49 #include <linux/if_ether.h>
  50 #include <net/net_namespace.h>
  51 #include <net/ip.h>
  52 #include <net/protocol.h>
  53 #include <linux/skbuff.h>
  54 #include <net/route.h>
  55 #include <net/sock.h>
  56 #include <net/icmp.h>
  57 #include <net/udp.h>
  58 #include <net/raw.h>
  59 #include <linux/notifier.h>
  60 #include <linux/if_arp.h>
  61 #include <linux/netfilter_ipv4.h>
  62 #include <net/ipip.h>
  63 #include <net/checksum.h>
  64 #include <net/netlink.h>
  65
  66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
  67 #define CONFIG_IP_PIMSM 1
  68 #endif
  69
  70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
  71    Note that the changes are semaphored via rtnl_lock.
  72  */
  73
  74 static DEFINE_RWLOCK(mrt_lock);
  75
  76 /*
  77  *      Multicast router control variables
  78  */
  79
  80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
  81
  82 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
  83
  84 /* Special spinlock for queue of unresolved entries */
  85 static DEFINE_SPINLOCK(mfc_unres_lock);
  86
  87 /* We return to original Alan's scheme. Hash table of resolved
  88    entries is changed only in process context and protected
  89    with weak lock mrt_lock. Queue of unresolved entries is protected
  90    with strong spinlock mfc_unres_lock.
  91
  92    In this case data path is free of exclusive locks at all.
  93  */
  94
  95 static struct kmem_cache *mrt_cachep __read_mostly;
  96
  97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
  98 static int ipmr_cache_report(struct net *net,
  99                              struct sk_buff *pkt, vifi_t vifi, int assert);
 100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 101
 102 static struct timer_list ipmr_expire_timer;
 103
 104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 105
 106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
 107 {
 108         struct net *net = dev_net(dev);
 109
 110         dev_close(dev);
 111
 112         dev = __dev_get_by_name(net, "tunl0");
 113         if (dev) {
 114                 const struct net_device_ops *ops = dev->netdev_ops;
 115                 struct ifreq ifr;
 116                 struct ip_tunnel_parm p;
 117
 118                 memset(&p, 0, sizeof(p));
 119                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
 120                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
 121                 p.iph.version = 4;
 122                 p.iph.ihl = 5;
 123                 p.iph.protocol = IPPROTO_IPIP;
 124                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 125                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 126
 127                 if (ops->ndo_do_ioctl) {
 128                         mm_segment_t oldfs = get_fs();
 129
 130                         set_fs(KERNEL_DS);
 131                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
 132                         set_fs(oldfs);
 133                 }
 134         }
 135 }
 136
 137 static
 138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 139 {
 140         struct net_device  *dev;
 141
 142         dev = __dev_get_by_name(net, "tunl0");
 143
 144         if (dev) {
 145                 const struct net_device_ops *ops = dev->netdev_ops;
 146                 int err;
 147                 struct ifreq ifr;
 148                 struct ip_tunnel_parm p;
 149                 struct in_device  *in_dev;
 150
 151                 memset(&p, 0, sizeof(p));
 152                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
 153                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
 154                 p.iph.version = 4;
 155                 p.iph.ihl = 5;
 156                 p.iph.protocol = IPPROTO_IPIP;
 157                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 158                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 159
 160                 if (ops->ndo_do_ioctl) {
 161                         mm_segment_t oldfs = get_fs();
 162
 163                         set_fs(KERNEL_DS);
 164                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
 165                         set_fs(oldfs);
 166                 } else
 167                         err = -EOPNOTSUPP;
 168
 169                 dev = NULL;
 170
 171                 if (err == 0 &&
 172                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
 173                         dev->flags |= IFF_MULTICAST;
 174
 175                         in_dev = __in_dev_get_rtnl(dev);
 176                         if (in_dev == NULL)
 177                                 goto failure;
 178
 179                         ipv4_devconf_setall(in_dev);
 180                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 181
 182                         if (dev_open(dev))
 183                                 goto failure;
 184                         dev_hold(dev);
 185                 }
 186         }
 187         return dev;
 188
 189 failure:
 190         /* allow the register to be completed before unregistering. */
 191         rtnl_unlock();
 192         rtnl_lock();
 193
 194         unregister_netdevice(dev);
 195         return NULL;
 196 }
 197
 198 #ifdef CONFIG_IP_PIMSM
 199
 200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 201 {
 202         struct net *net = dev_net(dev);
 203
 204         read_lock(&mrt_lock);
 205         dev->stats.tx_bytes += skb->len;
 206         dev->stats.tx_packets++;
 207         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
 208                           IGMPMSG_WHOLEPKT);
 209         read_unlock(&mrt_lock);
 210         kfree_skb(skb);
 211         return NETDEV_TX_OK;
 212 }
 213
 214 static const struct net_device_ops reg_vif_netdev_ops = {
 215         .ndo_start_xmit = reg_vif_xmit,
 216 };
 217
 218 static void reg_vif_setup(struct net_device *dev)
 219 {
 220         dev->type               = ARPHRD_PIMREG;
 221         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 222         dev->flags              = IFF_NOARP;
 223         dev->netdev_ops         = &reg_vif_netdev_ops,
 224         dev->destructor         = free_netdev;
 225         dev->features           |= NETIF_F_NETNS_LOCAL;
 226 }
 227
 228 static struct net_device *ipmr_reg_vif(struct net *net)
 229 {
 230         struct net_device *dev;
 231         struct in_device *in_dev;
 232
 233         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
 234
 235         if (dev == NULL)
 236                 return NULL;
 237
 238         dev_net_set(dev, net);
 239
 240         if (register_netdevice(dev)) {
 241                 free_netdev(dev);
 242                 return NULL;
 243         }
 244         dev->iflink = 0;
 245
 246         rcu_read_lock();
 247         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
 248                 rcu_read_unlock();
 249                 goto failure;
 250         }
 251
 252         ipv4_devconf_setall(in_dev);
 253         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 254         rcu_read_unlock();
 255
 256         if (dev_open(dev))
 257                 goto failure;
 258
 259         dev_hold(dev);
 260
 261         return dev;
 262
 263 failure:
 264         /* allow the register to be completed before unregistering. */
 265         rtnl_unlock();
 266         rtnl_lock();
 267
 268         unregister_netdevice(dev);
 269         return NULL;
 270 }
 271 #endif
 272
 273 /*
 274  *      Delete a VIF entry
 275  *      @notify: Set to 1, if the caller is a notifier_call
 276  */
 277
 278 static int vif_delete(struct net *net, int vifi, int notify,
 279                       struct list_head *head)
 280 {
 281         struct vif_device *v;
 282         struct net_device *dev;
 283         struct in_device *in_dev;
 284
 285         if (vifi < 0 || vifi >= net->ipv4.maxvif)
 286                 return -EADDRNOTAVAIL;
 287
 288         v = &net->ipv4.vif_table[vifi];
 289
 290         write_lock_bh(&mrt_lock);
 291         dev = v->dev;
 292         v->dev = NULL;
 293
 294         if (!dev) {
 295                 write_unlock_bh(&mrt_lock);
 296                 return -EADDRNOTAVAIL;
 297         }
 298
 299 #ifdef CONFIG_IP_PIMSM
 300         if (vifi == net->ipv4.mroute_reg_vif_num)
 301                 net->ipv4.mroute_reg_vif_num = -1;
 302 #endif
 303
 304         if (vifi+1 == net->ipv4.maxvif) {
 305                 int tmp;
 306                 for (tmp=vifi-1; tmp>=0; tmp--) {
 307                         if (VIF_EXISTS(net, tmp))
 308                                 break;
 309                 }
 310                 net->ipv4.maxvif = tmp+1;
 311         }
 312
 313         write_unlock_bh(&mrt_lock);
 314
 315         dev_set_allmulti(dev, -1);
 316
 317         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
 318                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
 319                 ip_rt_multicast_event(in_dev);
 320         }
 321
 322         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
 323                 unregister_netdevice_queue(dev, head);
 324
 325         dev_put(dev);
 326         return 0;
 327 }
 328
 329 static inline void ipmr_cache_free(struct mfc_cache *c)
 330 {
 331         release_net(mfc_net(c));
 332         kmem_cache_free(mrt_cachep, c);
 333 }
 334
 335 /* Destroy an unresolved cache entry, killing queued skbs
 336    and reporting error to netlink readers.
 337  */
 338
 339 static void ipmr_destroy_unres(struct mfc_cache *c)
 340 {
 341         struct sk_buff *skb;
 342         struct nlmsgerr *e;
 343         struct net *net = mfc_net(c);
 344
 345         atomic_dec(&net->ipv4.cache_resolve_queue_len);
 346
 347         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
 348                 if (ip_hdr(skb)->version == 0) {
 349                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 350                         nlh->nlmsg_type = NLMSG_ERROR;
 351                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 352                         skb_trim(skb, nlh->nlmsg_len);
 353                         e = NLMSG_DATA(nlh);
 354                         e->error = -ETIMEDOUT;
 355                         memset(&e->msg, 0, sizeof(e->msg));
 356
 357                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 358                 } else
 359                         kfree_skb(skb);
 360         }
 361
 362         ipmr_cache_free(c);
 363 }
 364
 365
 366 /* Single timer process for all the unresolved queue. */
 367
 368 static void ipmr_expire_process(unsigned long dummy)
 369 {
 370         unsigned long now;
 371         unsigned long expires;
 372         struct mfc_cache *c, **cp;
 373
 374         if (!spin_trylock(&mfc_unres_lock)) {
 375                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
 376                 return;
 377         }
 378
 379         if (mfc_unres_queue == NULL)
 380                 goto out;
 381
 382         now = jiffies;
 383         expires = 10*HZ;
 384         cp = &mfc_unres_queue;
 385
 386         while ((c=*cp) != NULL) {
 387                 if (time_after(c->mfc_un.unres.expires, now)) {
 388                         unsigned long interval = c->mfc_un.unres.expires - now;
 389                         if (interval < expires)
 390                                 expires = interval;
 391                         cp = &c->next;
 392                         continue;
 393                 }
 394
 395                 *cp = c->next;
 396
 397                 ipmr_destroy_unres(c);
 398         }
 399
 400         if (mfc_unres_queue != NULL)
 401                 mod_timer(&ipmr_expire_timer, jiffies + expires);
 402
 403 out:
 404         spin_unlock(&mfc_unres_lock);
 405 }
 406
 407 /* Fill oifs list. It is called under write locked mrt_lock. */
 408
 409 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
 410 {
 411         int vifi;
 412         struct net *net = mfc_net(cache);
 413
 414         cache->mfc_un.res.minvif = MAXVIFS;
 415         cache->mfc_un.res.maxvif = 0;
 416         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 417
 418         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
 419                 if (VIF_EXISTS(net, vifi) &&
 420                     ttls[vifi] && ttls[vifi] < 255) {
 421                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 422                         if (cache->mfc_un.res.minvif > vifi)
 423                                 cache->mfc_un.res.minvif = vifi;
 424                         if (cache->mfc_un.res.maxvif <= vifi)
 425                                 cache->mfc_un.res.maxvif = vifi + 1;
 426                 }
 427         }
 428 }
 429
 430 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 431 {
 432         int vifi = vifc->vifc_vifi;
 433         struct vif_device *v = &net->ipv4.vif_table[vifi];
 434         struct net_device *dev;
 435         struct in_device *in_dev;
 436         int err;
 437
 438         /* Is vif busy ? */
 439         if (VIF_EXISTS(net, vifi))
 440                 return -EADDRINUSE;
 441
 442         switch (vifc->vifc_flags) {
 443 #ifdef CONFIG_IP_PIMSM
 444         case VIFF_REGISTER:
 445                 /*
 446                  * Special Purpose VIF in PIM
 447                  * All the packets will be sent to the daemon
 448                  */
 449                 if (net->ipv4.mroute_reg_vif_num >= 0)
 450                         return -EADDRINUSE;
 451                 dev = ipmr_reg_vif(net);
 452                 if (!dev)
 453                         return -ENOBUFS;
 454                 err = dev_set_allmulti(dev, 1);
 455                 if (err) {
 456                         unregister_netdevice(dev);
 457                         dev_put(dev);
 458                         return err;
 459                 }
 460                 break;
 461 #endif
 462         case VIFF_TUNNEL:
 463                 dev = ipmr_new_tunnel(net, vifc);
 464                 if (!dev)
 465                         return -ENOBUFS;
 466                 err = dev_set_allmulti(dev, 1);
 467                 if (err) {
 468                         ipmr_del_tunnel(dev, vifc);
 469                         dev_put(dev);
 470                         return err;
 471                 }
 472                 break;
 473
 474         case VIFF_USE_IFINDEX:
 475         case 0:
 476                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
 477                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
 478                         if (dev && dev->ip_ptr == NULL) {
 479                                 dev_put(dev);
 480                                 return -EADDRNOTAVAIL;
 481                         }
 482                 } else
 483                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
 484
 485                 if (!dev)
 486                         return -EADDRNOTAVAIL;
 487                 err = dev_set_allmulti(dev, 1);
 488                 if (err) {
 489                         dev_put(dev);
 490                         return err;
 491                 }
 492                 break;
 493         default:
 494                 return -EINVAL;
 495         }
 496
 497         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
 498                 dev_put(dev);
 499                 return -EADDRNOTAVAIL;
 500         }
 501         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
 502         ip_rt_multicast_event(in_dev);
 503
 504         /*
 505          *      Fill in the VIF structures
 506          */
 507         v->rate_limit = vifc->vifc_rate_limit;
 508         v->local = vifc->vifc_lcl_addr.s_addr;
 509         v->remote = vifc->vifc_rmt_addr.s_addr;
 510         v->flags = vifc->vifc_flags;
 511         if (!mrtsock)
 512                 v->flags |= VIFF_STATIC;
 513         v->threshold = vifc->vifc_threshold;
 514         v->bytes_in = 0;
 515         v->bytes_out = 0;
 516         v->pkt_in = 0;
 517         v->pkt_out = 0;
 518         v->link = dev->ifindex;
 519         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 520                 v->link = dev->iflink;
 521
 522         /* And finish update writing critical data */
 523         write_lock_bh(&mrt_lock);
 524         v->dev = dev;
 525 #ifdef CONFIG_IP_PIMSM
 526         if (v->flags&VIFF_REGISTER)
 527                 net->ipv4.mroute_reg_vif_num = vifi;
 528 #endif
 529         if (vifi+1 > net->ipv4.maxvif)
 530                 net->ipv4.maxvif = vifi+1;
 531         write_unlock_bh(&mrt_lock);
 532         return 0;
 533 }
 534
 535 static struct mfc_cache *ipmr_cache_find(struct net *net,
 536                                          __be32 origin,
 537                                          __be32 mcastgrp)
 538 {
 539         int line = MFC_HASH(mcastgrp, origin);
 540         struct mfc_cache *c;
 541
 542         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
 543                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 544                         break;
 545         }
 546         return c;
 547 }
 548
 549 /*
 550  *      Allocate a multicast cache entry
 551  */
 552 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
 553 {
 554         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 555         if (c == NULL)
 556                 return NULL;
 557         c->mfc_un.res.minvif = MAXVIFS;
 558         mfc_net_set(c, net);
 559         return c;
 560 }
 561
 562 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
 563 {
 564         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 565         if (c == NULL)
 566                 return NULL;
 567         skb_queue_head_init(&c->mfc_un.unres.unresolved);
 568         c->mfc_un.unres.expires = jiffies + 10*HZ;
 569         mfc_net_set(c, net);
 570         return c;
 571 }
 572
 573 /*
 574  *      A cache entry has gone into a resolved state from queued
 575  */
 576
 577 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 578 {
 579         struct sk_buff *skb;
 580         struct nlmsgerr *e;
 581
 582         /*
 583          *      Play the pending entries through our router
 584          */
 585
 586         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 587                 if (ip_hdr(skb)->version == 0) {
 588                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 589
 590                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 591                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
 592                                                   (u8 *)nlh);
 593                         } else {
 594                                 nlh->nlmsg_type = NLMSG_ERROR;
 595                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 596                                 skb_trim(skb, nlh->nlmsg_len);
 597                                 e = NLMSG_DATA(nlh);
 598                                 e->error = -EMSGSIZE;
 599                                 memset(&e->msg, 0, sizeof(e->msg));
 600                         }
 601
 602                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
 603                 } else
 604                         ip_mr_forward(skb, c, 0);
 605         }
 606 }
 607
 608 /*
 609  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 610  *      expects the following bizarre scheme.
 611  *
 612  *      Called under mrt_lock.
 613  */
 614
 615 static int ipmr_cache_report(struct net *net,
 616                              struct sk_buff *pkt, vifi_t vifi, int assert)
 617 {
 618         struct sk_buff *skb;
 619         const int ihl = ip_hdrlen(pkt);
 620         struct igmphdr *igmp;
 621         struct igmpmsg *msg;
 622         int ret;
 623
 624 #ifdef CONFIG_IP_PIMSM
 625         if (assert == IGMPMSG_WHOLEPKT)
 626                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 627         else
 628 #endif
 629                 skb = alloc_skb(128, GFP_ATOMIC);
 630
 631         if (!skb)
 632                 return -ENOBUFS;
 633
 634 #ifdef CONFIG_IP_PIMSM
 635         if (assert == IGMPMSG_WHOLEPKT) {
 636                 /* Ugly, but we have no choice with this interface.
 637                    Duplicate old header, fix ihl, length etc.
 638                    And all this only to mangle msg->im_msgtype and
 639                    to set msg->im_mbz to "mbz" :-)
 640                  */
 641                 skb_push(skb, sizeof(struct iphdr));
 642                 skb_reset_network_header(skb);
 643                 skb_reset_transport_header(skb);
 644                 msg = (struct igmpmsg *)skb_network_header(skb);
 645                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 646                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
 647                 msg->im_mbz = 0;
 648                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
 649                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 650                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 651                                              sizeof(struct iphdr));
 652         } else
 653 #endif
 654         {
 655
 656         /*
 657          *      Copy the IP header
 658          */
 659
 660         skb->network_header = skb->tail;
 661         skb_put(skb, ihl);
 662         skb_copy_to_linear_data(skb, pkt->data, ihl);
 663         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
 664         msg = (struct igmpmsg *)skb_network_header(skb);
 665         msg->im_vif = vifi;
 666         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 667
 668         /*
 669          *      Add our header
 670          */
 671
 672         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
 673         igmp->type      =
 674         msg->im_msgtype = assert;
 675         igmp->code      =       0;
 676         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
 677         skb->transport_header = skb->network_header;
 678         }
 679
 680         if (net->ipv4.mroute_sk == NULL) {
 681                 kfree_skb(skb);
 682                 return -EINVAL;
 683         }
 684
 685         /*
 686          *      Deliver to mrouted
 687          */
 688         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
 689         if (ret < 0) {
 690                 if (net_ratelimit())
 691                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
 692                 kfree_skb(skb);
 693         }
 694
 695         return ret;
 696 }
 697
 698 /*
 699  *      Queue a packet for resolution. It gets locked cache entry!
 700  */
 701
 702 static int
 703 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 704 {
 705         int err;
 706         struct mfc_cache *c;
 707         const struct iphdr *iph = ip_hdr(skb);
 708
 709         spin_lock_bh(&mfc_unres_lock);
 710         for (c=mfc_unres_queue; c; c=c->next) {
 711                 if (net_eq(mfc_net(c), net) &&
 712                     c->mfc_mcastgrp == iph->daddr &&
 713                     c->mfc_origin == iph->saddr)
 714                         break;
 715         }
 716
 717         if (c == NULL) {
 718                 /*
 719                  *      Create a new entry if allowable
 720                  */
 721
 722                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
 723                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
 724                         spin_unlock_bh(&mfc_unres_lock);
 725
 726                         kfree_skb(skb);
 727                         return -ENOBUFS;
 728                 }
 729
 730                 /*
 731                  *      Fill in the new cache entry
 732                  */
 733                 c->mfc_parent   = -1;
 734                 c->mfc_origin   = iph->saddr;
 735                 c->mfc_mcastgrp = iph->daddr;
 736
 737                 /*
 738                  *      Reflect first query at mrouted.
 739                  */
 740                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
 741                 if (err < 0) {
 742                         /* If the report failed throw the cache entry
 743                            out - Brad Parker
 744                          */
 745                         spin_unlock_bh(&mfc_unres_lock);
 746
 747                         ipmr_cache_free(c);
 748                         kfree_skb(skb);
 749                         return err;
 750                 }
 751
 752                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
 753                 c->next = mfc_unres_queue;
 754                 mfc_unres_queue = c;
 755
 756                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
 757         }
 758
 759         /*
 760          *      See if we can append the packet
 761          */
 762         if (c->mfc_un.unres.unresolved.qlen>3) {
 763                 kfree_skb(skb);
 764                 err = -ENOBUFS;
 765         } else {
 766                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
 767                 err = 0;
 768         }
 769
 770         spin_unlock_bh(&mfc_unres_lock);
 771         return err;
 772 }
 773
 774 /*
 775  *      MFC cache manipulation by user space mroute daemon
 776  */
 777
 778 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 779 {
 780         int line;
 781         struct mfc_cache *c, **cp;
 782
 783         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 784
 785         for (cp = &net->ipv4.mfc_cache_array[line];
 786              (c = *cp) != NULL; cp = &c->next) {
 787                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 788                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 789                         write_lock_bh(&mrt_lock);
 790                         *cp = c->next;
 791                         write_unlock_bh(&mrt_lock);
 792
 793                         ipmr_cache_free(c);
 794                         return 0;
 795                 }
 796         }
 797         return -ENOENT;
 798 }
 799
 800 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 801 {
 802         int line;
 803         struct mfc_cache *uc, *c, **cp;
 804
 805         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 806
 807         for (cp = &net->ipv4.mfc_cache_array[line];
 808              (c = *cp) != NULL; cp = &c->next) {
 809                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 810                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
 811                         break;
 812         }
 813
 814         if (c != NULL) {
 815                 write_lock_bh(&mrt_lock);
 816                 c->mfc_parent = mfc->mfcc_parent;
 817                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
 818                 if (!mrtsock)
 819                         c->mfc_flags |= MFC_STATIC;
 820                 write_unlock_bh(&mrt_lock);
 821                 return 0;
 822         }
 823
 824         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 825                 return -EINVAL;
 826
 827         c = ipmr_cache_alloc(net);
 828         if (c == NULL)
 829                 return -ENOMEM;
 830
 831         c->mfc_origin = mfc->mfcc_origin.s_addr;
 832         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
 833         c->mfc_parent = mfc->mfcc_parent;
 834         ipmr_update_thresholds(c, mfc->mfcc_ttls);
 835         if (!mrtsock)
 836                 c->mfc_flags |= MFC_STATIC;
 837
 838         write_lock_bh(&mrt_lock);
 839         c->next = net->ipv4.mfc_cache_array[line];
 840         net->ipv4.mfc_cache_array[line] = c;
 841         write_unlock_bh(&mrt_lock);
 842
 843         /*
 844          *      Check to see if we resolved a queued list. If so we
 845          *      need to send on the frames and tidy up.
 846          */
 847         spin_lock_bh(&mfc_unres_lock);
 848         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
 849              cp = &uc->next) {
 850                 if (net_eq(mfc_net(uc), net) &&
 851                     uc->mfc_origin == c->mfc_origin &&
 852                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 853                         *cp = uc->next;
 854                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
 855                         break;
 856                 }
 857         }
 858         if (mfc_unres_queue == NULL)
 859                 del_timer(&ipmr_expire_timer);
 860         spin_unlock_bh(&mfc_unres_lock);
 861
 862         if (uc) {
 863                 ipmr_cache_resolve(uc, c);
 864                 ipmr_cache_free(uc);
 865         }
 866         return 0;
 867 }
 868
 869 /*
 870  *      Close the multicast socket, and clear the vif tables etc
 871  */
 872
 873 static void mroute_clean_tables(struct net *net)
 874 {
 875         int i;
 876         LIST_HEAD(list);
 877
 878         /*
 879          *      Shut down all active vif entries
 880          */
 881         for (i = 0; i < net->ipv4.maxvif; i++) {
 882                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
 883                         vif_delete(net, i, 0, &list);
 884         }
 885         unregister_netdevice_many(&list);
 886
 887         /*
 888          *      Wipe the cache
 889          */
 890         for (i=0; i<MFC_LINES; i++) {
 891                 struct mfc_cache *c, **cp;
 892
 893                 cp = &net->ipv4.mfc_cache_array[i];
 894                 while ((c = *cp) != NULL) {
 895                         if (c->mfc_flags&MFC_STATIC) {
 896                                 cp = &c->next;
 897                                 continue;
 898                         }
 899                         write_lock_bh(&mrt_lock);
 900                         *cp = c->next;
 901                         write_unlock_bh(&mrt_lock);
 902
 903                         ipmr_cache_free(c);
 904                 }
 905         }
 906
 907         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
 908                 struct mfc_cache *c, **cp;
 909
 910                 spin_lock_bh(&mfc_unres_lock);
 911                 cp = &mfc_unres_queue;
 912                 while ((c = *cp) != NULL) {
 913                         if (!net_eq(mfc_net(c), net)) {
 914                                 cp = &c->next;
 915                                 continue;
 916                         }
 917                         *cp = c->next;
 918
 919                         ipmr_destroy_unres(c);
 920                 }
 921                 spin_unlock_bh(&mfc_unres_lock);
 922         }
 923 }
 924
 925 static void mrtsock_destruct(struct sock *sk)
 926 {
 927         struct net *net = sock_net(sk);
 928
 929         rtnl_lock();
 930         if (sk == net->ipv4.mroute_sk) {
 931                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 932
 933                 write_lock_bh(&mrt_lock);
 934                 net->ipv4.mroute_sk = NULL;
 935                 write_unlock_bh(&mrt_lock);
 936
 937                 mroute_clean_tables(net);
 938         }
 939         rtnl_unlock();
 940 }
 941
 942 /*
 943  *      Socket options and virtual interface manipulation. The whole
 944  *      virtual interface system is a complete heap, but unfortunately
 945  *      that's how BSD mrouted happens to think. Maybe one day with a proper
 946  *      MOSPF/PIM router set up we can clean this up.
 947  */
 948
 949 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 950 {
 951         int ret;
 952         struct vifctl vif;
 953         struct mfcctl mfc;
 954         struct net *net = sock_net(sk);
 955
 956         if (optname != MRT_INIT) {
 957                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
 958                         return -EACCES;
 959         }
 960
 961         switch (optname) {
 962         case MRT_INIT:
 963                 if (sk->sk_type != SOCK_RAW ||
 964                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
 965                         return -EOPNOTSUPP;
 966                 if (optlen != sizeof(int))
 967                         return -ENOPROTOOPT;
 968
 969                 rtnl_lock();
 970                 if (net->ipv4.mroute_sk) {
 971                         rtnl_unlock();
 972                         return -EADDRINUSE;
 973                 }
 974
 975                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
 976                 if (ret == 0) {
 977                         write_lock_bh(&mrt_lock);
 978                         net->ipv4.mroute_sk = sk;
 979                         write_unlock_bh(&mrt_lock);
 980
 981                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
 982                 }
 983                 rtnl_unlock();
 984                 return ret;
 985         case MRT_DONE:
 986                 if (sk != net->ipv4.mroute_sk)
 987                         return -EACCES;
 988                 return ip_ra_control(sk, 0, NULL);
 989         case MRT_ADD_VIF:
 990         case MRT_DEL_VIF:
 991                 if (optlen != sizeof(vif))
 992                         return -EINVAL;
 993                 if (copy_from_user(&vif, optval, sizeof(vif)))
 994                         return -EFAULT;
 995                 if (vif.vifc_vifi >= MAXVIFS)
 996                         return -ENFILE;
 997                 rtnl_lock();
 998                 if (optname == MRT_ADD_VIF) {
 999                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
1000                 } else {
1001                         ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
1002                 }
1003                 rtnl_unlock();
1004                 return ret;
1005
1006                 /*
1007                  *      Manipulate the forwarding caches. These live
1008                  *      in a sort of kernel/user symbiosis.
1009                  */
1010         case MRT_ADD_MFC:
1011         case MRT_DEL_MFC:
1012                 if (optlen != sizeof(mfc))
1013                         return -EINVAL;
1014                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1015                         return -EFAULT;
1016                 rtnl_lock();
1017                 if (optname == MRT_DEL_MFC)
1018                         ret = ipmr_mfc_delete(net, &mfc);
1019                 else
1020                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1021                 rtnl_unlock();
1022                 return ret;
1023                 /*
1024                  *      Control PIM assert.
1025                  */
1026         case MRT_ASSERT:
1027         {
1028                 int v;
1029                 if (get_user(v,(int __user *)optval))
1030                         return -EFAULT;
1031                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1032                 return 0;
1033         }
1034 #ifdef CONFIG_IP_PIMSM
1035         case MRT_PIM:
1036         {
1037                 int v;
1038
1039                 if (get_user(v,(int __user *)optval))
1040                         return -EFAULT;
1041                 v = (v) ? 1 : 0;
1042
1043                 rtnl_lock();
1044                 ret = 0;
1045                 if (v != net->ipv4.mroute_do_pim) {
1046                         net->ipv4.mroute_do_pim = v;
1047                         net->ipv4.mroute_do_assert = v;
1048                 }
1049                 rtnl_unlock();
1050                 return ret;
1051         }
1052 #endif
1053         /*
1054          *      Spurious command, or MRT_VERSION which you cannot
1055          *      set.
1056          */
1057         default:
1058                 return -ENOPROTOOPT;
1059         }
1060 }
1061
1062 /*
1063  *      Getsock opt support for the multicast routing system.
1064  */
1065
1066 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1067 {
1068         int olr;
1069         int val;
1070         struct net *net = sock_net(sk);
1071
1072         if (optname != MRT_VERSION &&
1073 #ifdef CONFIG_IP_PIMSM
1074            optname!=MRT_PIM &&
1075 #endif
1076            optname!=MRT_ASSERT)
1077                 return -ENOPROTOOPT;
1078
1079         if (get_user(olr, optlen))
1080                 return -EFAULT;
1081
1082         olr = min_t(unsigned int, olr, sizeof(int));
1083         if (olr < 0)
1084                 return -EINVAL;
1085
1086         if (put_user(olr, optlen))
1087                 return -EFAULT;
1088         if (optname == MRT_VERSION)
1089                 val = 0x0305;
1090 #ifdef CONFIG_IP_PIMSM
1091         else if (optname == MRT_PIM)
1092                 val = net->ipv4.mroute_do_pim;
1093 #endif
1094         else
1095                 val = net->ipv4.mroute_do_assert;
1096         if (copy_to_user(optval, &val, olr))
1097                 return -EFAULT;
1098         return 0;
1099 }
1100
1101 /*
1102  *      The IP multicast ioctl support routines.
1103  */
1104
1105 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1106 {
1107         struct sioc_sg_req sr;
1108         struct sioc_vif_req vr;
1109         struct vif_device *vif;
1110         struct mfc_cache *c;
1111         struct net *net = sock_net(sk);
1112
1113         switch (cmd) {
1114         case SIOCGETVIFCNT:
1115                 if (copy_from_user(&vr, arg, sizeof(vr)))
1116                         return -EFAULT;
1117                 if (vr.vifi >= net->ipv4.maxvif)
1118                         return -EINVAL;
1119                 read_lock(&mrt_lock);
1120                 vif = &net->ipv4.vif_table[vr.vifi];
1121                 if (VIF_EXISTS(net, vr.vifi)) {
1122                         vr.icount = vif->pkt_in;
1123                         vr.ocount = vif->pkt_out;
1124                         vr.ibytes = vif->bytes_in;
1125                         vr.obytes = vif->bytes_out;
1126                         read_unlock(&mrt_lock);
1127
1128                         if (copy_to_user(arg, &vr, sizeof(vr)))
1129                                 return -EFAULT;
1130                         return 0;
1131                 }
1132                 read_unlock(&mrt_lock);
1133                 return -EADDRNOTAVAIL;
1134         case SIOCGETSGCNT:
1135                 if (copy_from_user(&sr, arg, sizeof(sr)))
1136                         return -EFAULT;
1137
1138                 read_lock(&mrt_lock);
1139                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1140                 if (c) {
1141                         sr.pktcnt = c->mfc_un.res.pkt;
1142                         sr.bytecnt = c->mfc_un.res.bytes;
1143                         sr.wrong_if = c->mfc_un.res.wrong_if;
1144                         read_unlock(&mrt_lock);
1145
1146                         if (copy_to_user(arg, &sr, sizeof(sr)))
1147                                 return -EFAULT;
1148                         return 0;
1149                 }
1150                 read_unlock(&mrt_lock);
1151                 return -EADDRNOTAVAIL;
1152         default:
1153                 return -ENOIOCTLCMD;
1154         }
1155 }
1156
1157
1158 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1159 {
1160         struct net_device *dev = ptr;
1161         struct net *net = dev_net(dev);
1162         struct vif_device *v;
1163         int ct;
1164         LIST_HEAD(list);
1165
1166         if (!net_eq(dev_net(dev), net))
1167                 return NOTIFY_DONE;
1168
1169         if (event != NETDEV_UNREGISTER)
1170                 return NOTIFY_DONE;
1171         v = &net->ipv4.vif_table[0];
1172         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1173                 if (v->dev == dev)
1174                         vif_delete(net, ct, 1, &list);
1175         }
1176         unregister_netdevice_many(&list);
1177         return NOTIFY_DONE;
1178 }
1179
1180
1181 static struct notifier_block ip_mr_notifier = {
1182         .notifier_call = ipmr_device_event,
1183 };
1184
1185 /*
1186  *      Encapsulate a packet by attaching a valid IPIP header to it.
1187  *      This avoids tunnel drivers and other mess and gives us the speed so
1188  *      important for multicast video.
1189  */
1190
1191 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1192 {
1193         struct iphdr *iph;
1194         struct iphdr *old_iph = ip_hdr(skb);
1195
1196         skb_push(skb, sizeof(struct iphdr));
1197         skb->transport_header = skb->network_header;
1198         skb_reset_network_header(skb);
1199         iph = ip_hdr(skb);
1200
1201         iph->version    =       4;
1202         iph->tos        =       old_iph->tos;
1203         iph->ttl        =       old_iph->ttl;
1204         iph->frag_off   =       0;
1205         iph->daddr      =       daddr;
1206         iph->saddr      =       saddr;
1207         iph->protocol   =       IPPROTO_IPIP;
1208         iph->ihl        =       5;
1209         iph->tot_len    =       htons(skb->len);
1210         ip_select_ident(iph, skb_dst(skb), NULL);
1211         ip_send_check(iph);
1212
1213         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1214         nf_reset(skb);
1215 }
1216
1217 static inline int ipmr_forward_finish(struct sk_buff *skb)
1218 {
1219         struct ip_options * opt = &(IPCB(skb)->opt);
1220
1221         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1222
1223         if (unlikely(opt->optlen))
1224                 ip_forward_options(skb);
1225
1226         return dst_output(skb);
1227 }
1228
1229 /*
1230  *      Processing handlers for ipmr_forward
1231  */
1232
1233 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1234 {
1235         struct net *net = mfc_net(c);
1236         const struct iphdr *iph = ip_hdr(skb);
1237         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1238         struct net_device *dev;
1239         struct rtable *rt;
1240         int    encap = 0;
1241
1242         if (vif->dev == NULL)
1243                 goto out_free;
1244
1245 #ifdef CONFIG_IP_PIMSM
1246         if (vif->flags & VIFF_REGISTER) {
1247                 vif->pkt_out++;
1248                 vif->bytes_out += skb->len;
1249                 vif->dev->stats.tx_bytes += skb->len;
1250                 vif->dev->stats.tx_packets++;
1251                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1252                 goto out_free;
1253         }
1254 #endif
1255
1256         if (vif->flags&VIFF_TUNNEL) {
1257                 struct flowi fl = { .oif = vif->link,
1258                                     .nl_u = { .ip4_u =
1259                                               { .daddr = vif->remote,
1260                                                 .saddr = vif->local,
1261                                                 .tos = RT_TOS(iph->tos) } },
1262                                     .proto = IPPROTO_IPIP };
1263                 if (ip_route_output_key(net, &rt, &fl))
1264                         goto out_free;
1265                 encap = sizeof(struct iphdr);
1266         } else {
1267                 struct flowi fl = { .oif = vif->link,
1268                                     .nl_u = { .ip4_u =
1269                                               { .daddr = iph->daddr,
1270                                                 .tos = RT_TOS(iph->tos) } },
1271                                     .proto = IPPROTO_IPIP };
1272                 if (ip_route_output_key(net, &rt, &fl))
1273                         goto out_free;
1274         }
1275
1276         dev = rt->u.dst.dev;
1277
1278         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1279                 /* Do not fragment multicasts. Alas, IPv4 does not
1280                    allow to send ICMP, so that packets will disappear
1281                    to blackhole.
1282                  */
1283
1284                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1285                 ip_rt_put(rt);
1286                 goto out_free;
1287         }
1288
1289         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1290
1291         if (skb_cow(skb, encap)) {
1292                 ip_rt_put(rt);
1293                 goto out_free;
1294         }
1295
1296         vif->pkt_out++;
1297         vif->bytes_out += skb->len;
1298
1299         skb_dst_drop(skb);
1300         skb_dst_set(skb, &rt->u.dst);
1301         ip_decrease_ttl(ip_hdr(skb));
1302
1303         /* FIXME: forward and output firewalls used to be called here.
1304          * What do we do with netfilter? -- RR */
1305         if (vif->flags & VIFF_TUNNEL) {
1306                 ip_encap(skb, vif->local, vif->remote);
1307                 /* FIXME: extra output firewall step used to be here. --RR */
1308                 vif->dev->stats.tx_packets++;
1309                 vif->dev->stats.tx_bytes += skb->len;
1310         }
1311
1312         IPCB(skb)->flags |= IPSKB_FORWARDED;
1313
1314         /*
1315          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1316          * not only before forwarding, but after forwarding on all output
1317          * interfaces. It is clear, if mrouter runs a multicasting
1318          * program, it should receive packets not depending to what interface
1319          * program is joined.
1320          * If we will not make it, the program will have to join on all
1321          * interfaces. On the other hand, multihoming host (or router, but
1322          * not mrouter) cannot join to more than one interface - it will
1323          * result in receiving multiple packets.
1324          */
1325         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1326                 ipmr_forward_finish);
1327         return;
1328
1329 out_free:
1330         kfree_skb(skb);
1331         return;
1332 }
1333
1334 static int ipmr_find_vif(struct net_device *dev)
1335 {
1336         struct net *net = dev_net(dev);
1337         int ct;
1338         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1339                 if (net->ipv4.vif_table[ct].dev == dev)
1340                         break;
1341         }
1342         return ct;
1343 }
1344
1345 /* "local" means that we should preserve one skb (for local delivery) */
1346
1347 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1348 {
1349         int psend = -1;
1350         int vif, ct;
1351         struct net *net = mfc_net(cache);
1352
1353         vif = cache->mfc_parent;
1354         cache->mfc_un.res.pkt++;
1355         cache->mfc_un.res.bytes += skb->len;
1356
1357         /*
1358          * Wrong interface: drop packet and (maybe) send PIM assert.
1359          */
1360         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1361                 int true_vifi;
1362
1363                 if (skb_rtable(skb)->fl.iif == 0) {
1364                         /* It is our own packet, looped back.
1365                            Very complicated situation...
1366
1367                            The best workaround until routing daemons will be
1368                            fixed is not to redistribute packet, if it was
1369                            send through wrong interface. It means, that
1370                            multicast applications WILL NOT work for
1371                            (S,G), which have default multicast route pointing
1372                            to wrong oif. In any case, it is not a good
1373                            idea to use multicasting applications on router.
1374                          */
1375                         goto dont_forward;
1376                 }
1377
1378                 cache->mfc_un.res.wrong_if++;
1379                 true_vifi = ipmr_find_vif(skb->dev);
1380
1381                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1382                     /* pimsm uses asserts, when switching from RPT to SPT,
1383                        so that we cannot check that packet arrived on an oif.
1384                        It is bad, but otherwise we would need to move pretty
1385                        large chunk of pimd to kernel. Ough... --ANK
1386                      */
1387                     (net->ipv4.mroute_do_pim ||
1388                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1389                     time_after(jiffies,
1390                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1391                         cache->mfc_un.res.last_assert = jiffies;
1392                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1393                 }
1394                 goto dont_forward;
1395         }
1396
1397         net->ipv4.vif_table[vif].pkt_in++;
1398         net->ipv4.vif_table[vif].bytes_in += skb->len;
1399
1400         /*
1401          *      Forward the frame
1402          */
1403         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1404                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1405                         if (psend != -1) {
1406                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1407                                 if (skb2)
1408                                         ipmr_queue_xmit(skb2, cache, psend);
1409                         }
1410                         psend = ct;
1411                 }
1412         }
1413         if (psend != -1) {
1414                 if (local) {
1415                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1416                         if (skb2)
1417                                 ipmr_queue_xmit(skb2, cache, psend);
1418                 } else {
1419                         ipmr_queue_xmit(skb, cache, psend);
1420                         return 0;
1421                 }
1422         }
1423
1424 dont_forward:
1425         if (!local)
1426                 kfree_skb(skb);
1427         return 0;
1428 }
1429
1430
1431 /*
1432  *      Multicast packets for forwarding arrive here
1433  */
1434
1435 int ip_mr_input(struct sk_buff *skb)
1436 {
1437         struct mfc_cache *cache;
1438         struct net *net = dev_net(skb->dev);
1439         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1440
1441         /* Packet is looped back after forward, it should not be
1442            forwarded second time, but still can be delivered locally.
1443          */
1444         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1445                 goto dont_forward;
1446
1447         if (!local) {
1448                     if (IPCB(skb)->opt.router_alert) {
1449                             if (ip_call_ra_chain(skb))
1450                                     return 0;
1451                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1452                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1453                                Cisco IOS <= 11.2(8)) do not put router alert
1454                                option to IGMP packets destined to routable
1455                                groups. It is very bad, because it means
1456                                that we can forward NO IGMP messages.
1457                              */
1458                             read_lock(&mrt_lock);
1459                             if (net->ipv4.mroute_sk) {
1460                                     nf_reset(skb);
1461                                     raw_rcv(net->ipv4.mroute_sk, skb);
1462                                     read_unlock(&mrt_lock);
1463                                     return 0;
1464                             }
1465                             read_unlock(&mrt_lock);
1466                     }
1467         }
1468
1469         read_lock(&mrt_lock);
1470         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1471
1472         /*
1473          *      No usable cache entry
1474          */
1475         if (cache == NULL) {
1476                 int vif;
1477
1478                 if (local) {
1479                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1480                         ip_local_deliver(skb);
1481                         if (skb2 == NULL) {
1482                                 read_unlock(&mrt_lock);
1483                                 return -ENOBUFS;
1484                         }
1485                         skb = skb2;
1486                 }
1487
1488                 vif = ipmr_find_vif(skb->dev);
1489                 if (vif >= 0) {
1490                         int err = ipmr_cache_unresolved(net, vif, skb);
1491                         read_unlock(&mrt_lock);
1492
1493                         return err;
1494                 }
1495                 read_unlock(&mrt_lock);
1496                 kfree_skb(skb);
1497                 return -ENODEV;
1498         }
1499
1500         ip_mr_forward(skb, cache, local);
1501
1502         read_unlock(&mrt_lock);
1503
1504         if (local)
1505                 return ip_local_deliver(skb);
1506
1507         return 0;
1508
1509 dont_forward:
1510         if (local)
1511                 return ip_local_deliver(skb);
1512         kfree_skb(skb);
1513         return 0;
1514 }
1515
1516 #ifdef CONFIG_IP_PIMSM
1517 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1518 {
1519         struct net_device *reg_dev = NULL;
1520         struct iphdr *encap;
1521         struct net *net = dev_net(skb->dev);
1522
1523         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1524         /*
1525            Check that:
1526            a. packet is really destinted to a multicast group
1527            b. packet is not a NULL-REGISTER
1528            c. packet is not truncated
1529          */
1530         if (!ipv4_is_multicast(encap->daddr) ||
1531             encap->tot_len == 0 ||
1532             ntohs(encap->tot_len) + pimlen > skb->len)
1533                 return 1;
1534
1535         read_lock(&mrt_lock);
1536         if (net->ipv4.mroute_reg_vif_num >= 0)
1537                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1538         if (reg_dev)
1539                 dev_hold(reg_dev);
1540         read_unlock(&mrt_lock);
1541
1542         if (reg_dev == NULL)
1543                 return 1;
1544
1545         skb->mac_header = skb->network_header;
1546         skb_pull(skb, (u8*)encap - skb->data);
1547         skb_reset_network_header(skb);
1548         skb->dev = reg_dev;
1549         skb->protocol = htons(ETH_P_IP);
1550         skb->ip_summed = 0;
1551         skb->pkt_type = PACKET_HOST;
1552         skb_dst_drop(skb);
1553         reg_dev->stats.rx_bytes += skb->len;
1554         reg_dev->stats.rx_packets++;
1555         nf_reset(skb);
1556         netif_rx(skb);
1557         dev_put(reg_dev);
1558
1559         return 0;
1560 }
1561 #endif
1562
1563 #ifdef CONFIG_IP_PIMSM_V1
1564 /*
1565  * Handle IGMP messages of PIMv1
1566  */
1567
1568 int pim_rcv_v1(struct sk_buff * skb)
1569 {
1570         struct igmphdr *pim;
1571         struct net *net = dev_net(skb->dev);
1572
1573         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1574                 goto drop;
1575
1576         pim = igmp_hdr(skb);
1577
1578         if (!net->ipv4.mroute_do_pim ||
1579             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1580                 goto drop;
1581
1582         if (__pim_rcv(skb, sizeof(*pim))) {
1583 drop:
1584                 kfree_skb(skb);
1585         }
1586         return 0;
1587 }
1588 #endif
1589
1590 #ifdef CONFIG_IP_PIMSM_V2
1591 static int pim_rcv(struct sk_buff * skb)
1592 {
1593         struct pimreghdr *pim;
1594
1595         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1596                 goto drop;
1597
1598         pim = (struct pimreghdr *)skb_transport_header(skb);
1599         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1600             (pim->flags&PIM_NULL_REGISTER) ||
1601             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1602              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1603                 goto drop;
1604
1605         if (__pim_rcv(skb, sizeof(*pim))) {
1606 drop:
1607                 kfree_skb(skb);
1608         }
1609         return 0;
1610 }
1611 #endif
1612
1613 static int
1614 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1615 {
1616         int ct;
1617         struct rtnexthop *nhp;
1618         struct net *net = mfc_net(c);
1619         struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1620         u8 *b = skb_tail_pointer(skb);
1621         struct rtattr *mp_head;
1622
1623         if (dev)
1624                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1625
1626         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1627
1628         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1629                 if (c->mfc_un.res.ttls[ct] < 255) {
1630                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1631                                 goto rtattr_failure;
1632                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1633                         nhp->rtnh_flags = 0;
1634                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1635                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1636                         nhp->rtnh_len = sizeof(*nhp);
1637                 }
1638         }
1639         mp_head->rta_type = RTA_MULTIPATH;
1640         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1641         rtm->rtm_type = RTN_MULTICAST;
1642         return 1;
1643
1644 rtattr_failure:
1645         nlmsg_trim(skb, b);
1646         return -EMSGSIZE;
1647 }
1648
1649 int ipmr_get_route(struct net *net,
1650                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1651 {
1652         int err;
1653         struct mfc_cache *cache;
1654         struct rtable *rt = skb_rtable(skb);
1655
1656         read_lock(&mrt_lock);
1657         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1658
1659         if (cache == NULL) {
1660                 struct sk_buff *skb2;
1661                 struct iphdr *iph;
1662                 struct net_device *dev;
1663                 int vif;
1664
1665                 if (nowait) {
1666                         read_unlock(&mrt_lock);
1667                         return -EAGAIN;
1668                 }
1669
1670                 dev = skb->dev;
1671                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1672                         read_unlock(&mrt_lock);
1673                         return -ENODEV;
1674                 }
1675                 skb2 = skb_clone(skb, GFP_ATOMIC);
1676                 if (!skb2) {
1677                         read_unlock(&mrt_lock);
1678                         return -ENOMEM;
1679                 }
1680
1681                 skb_push(skb2, sizeof(struct iphdr));
1682                 skb_reset_network_header(skb2);
1683                 iph = ip_hdr(skb2);
1684                 iph->ihl = sizeof(struct iphdr) >> 2;
1685                 iph->saddr = rt->rt_src;
1686                 iph->daddr = rt->rt_dst;
1687                 iph->version = 0;
1688                 err = ipmr_cache_unresolved(net, vif, skb2);
1689                 read_unlock(&mrt_lock);
1690                 return err;
1691         }
1692
1693         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1694                 cache->mfc_flags |= MFC_NOTIFY;
1695         err = ipmr_fill_mroute(skb, cache, rtm);
1696         read_unlock(&mrt_lock);
1697         return err;
1698 }
1699
1700 #ifdef CONFIG_PROC_FS
1701 /*
1702  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1703  */
1704 struct ipmr_vif_iter {
1705         struct seq_net_private p;
1706         int ct;
1707 };
1708
1709 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1710                                            struct ipmr_vif_iter *iter,
1711                                            loff_t pos)
1712 {
1713         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1714                 if (!VIF_EXISTS(net, iter->ct))
1715                         continue;
1716                 if (pos-- == 0)
1717                         return &net->ipv4.vif_table[iter->ct];
1718         }
1719         return NULL;
1720 }
1721
1722 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1723         __acquires(mrt_lock)
1724 {
1725         struct net *net = seq_file_net(seq);
1726
1727         read_lock(&mrt_lock);
1728         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1729                 : SEQ_START_TOKEN;
1730 }
1731
1732 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1733 {
1734         struct ipmr_vif_iter *iter = seq->private;
1735         struct net *net = seq_file_net(seq);
1736
1737         ++*pos;
1738         if (v == SEQ_START_TOKEN)
1739                 return ipmr_vif_seq_idx(net, iter, 0);
1740
1741         while (++iter->ct < net->ipv4.maxvif) {
1742                 if (!VIF_EXISTS(net, iter->ct))
1743                         continue;
1744                 return &net->ipv4.vif_table[iter->ct];
1745         }
1746         return NULL;
1747 }
1748
1749 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1750         __releases(mrt_lock)
1751 {
1752         read_unlock(&mrt_lock);
1753 }
1754
1755 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1756 {
1757         struct net *net = seq_file_net(seq);
1758
1759         if (v == SEQ_START_TOKEN) {
1760                 seq_puts(seq,
1761                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1762         } else {
1763                 const struct vif_device *vif = v;
1764                 const char *name =  vif->dev ? vif->dev->name : "none";
1765
1766                 seq_printf(seq,
1767                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1768                            vif - net->ipv4.vif_table,
1769                            name, vif->bytes_in, vif->pkt_in,
1770                            vif->bytes_out, vif->pkt_out,
1771                            vif->flags, vif->local, vif->remote);
1772         }
1773         return 0;
1774 }
1775
1776 static const struct seq_operations ipmr_vif_seq_ops = {
1777         .start = ipmr_vif_seq_start,
1778         .next  = ipmr_vif_seq_next,
1779         .stop  = ipmr_vif_seq_stop,
1780         .show  = ipmr_vif_seq_show,
1781 };
1782
1783 static int ipmr_vif_open(struct inode *inode, struct file *file)
1784 {
1785         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1786                             sizeof(struct ipmr_vif_iter));
1787 }
1788
1789 static const struct file_operations ipmr_vif_fops = {
1790         .owner   = THIS_MODULE,
1791         .open    = ipmr_vif_open,
1792         .read    = seq_read,
1793         .llseek  = seq_lseek,
1794         .release = seq_release_net,
1795 };
1796
1797 struct ipmr_mfc_iter {
1798         struct seq_net_private p;
1799         struct mfc_cache **cache;
1800         int ct;
1801 };
1802
1803
1804 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1805                                           struct ipmr_mfc_iter *it, loff_t pos)
1806 {
1807         struct mfc_cache *mfc;
1808
1809         it->cache = net->ipv4.mfc_cache_array;
1810         read_lock(&mrt_lock);
1811         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1812                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1813                      mfc; mfc = mfc->next)
1814                         if (pos-- == 0)
1815                                 return mfc;
1816         read_unlock(&mrt_lock);
1817
1818         it->cache = &mfc_unres_queue;
1819         spin_lock_bh(&mfc_unres_lock);
1820         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1821                 if (net_eq(mfc_net(mfc), net) &&
1822                     pos-- == 0)
1823                         return mfc;
1824         spin_unlock_bh(&mfc_unres_lock);
1825
1826         it->cache = NULL;
1827         return NULL;
1828 }
1829
1830
1831 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1832 {
1833         struct ipmr_mfc_iter *it = seq->private;
1834         struct net *net = seq_file_net(seq);
1835
1836         it->cache = NULL;
1837         it->ct = 0;
1838         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1839                 : SEQ_START_TOKEN;
1840 }
1841
1842 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1843 {
1844         struct mfc_cache *mfc = v;
1845         struct ipmr_mfc_iter *it = seq->private;
1846         struct net *net = seq_file_net(seq);
1847
1848         ++*pos;
1849
1850         if (v == SEQ_START_TOKEN)
1851                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1852
1853         if (mfc->next)
1854                 return mfc->next;
1855
1856         if (it->cache == &mfc_unres_queue)
1857                 goto end_of_list;
1858
1859         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1860
1861         while (++it->ct < MFC_LINES) {
1862                 mfc = net->ipv4.mfc_cache_array[it->ct];
1863                 if (mfc)
1864                         return mfc;
1865         }
1866
1867         /* exhausted cache_array, show unresolved */
1868         read_unlock(&mrt_lock);
1869         it->cache = &mfc_unres_queue;
1870         it->ct = 0;
1871
1872         spin_lock_bh(&mfc_unres_lock);
1873         mfc = mfc_unres_queue;
1874         while (mfc && !net_eq(mfc_net(mfc), net))
1875                 mfc = mfc->next;
1876         if (mfc)
1877                 return mfc;
1878
1879  end_of_list:
1880         spin_unlock_bh(&mfc_unres_lock);
1881         it->cache = NULL;
1882
1883         return NULL;
1884 }
1885
1886 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1887 {
1888         struct ipmr_mfc_iter *it = seq->private;
1889         struct net *net = seq_file_net(seq);
1890
1891         if (it->cache == &mfc_unres_queue)
1892                 spin_unlock_bh(&mfc_unres_lock);
1893         else if (it->cache == net->ipv4.mfc_cache_array)
1894                 read_unlock(&mrt_lock);
1895 }
1896
1897 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1898 {
1899         int n;
1900         struct net *net = seq_file_net(seq);
1901
1902         if (v == SEQ_START_TOKEN) {
1903                 seq_puts(seq,
1904                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1905         } else {
1906                 const struct mfc_cache *mfc = v;
1907                 const struct ipmr_mfc_iter *it = seq->private;
1908
1909                 seq_printf(seq, "%08lX %08lX %-3hd",
1910                            (unsigned long) mfc->mfc_mcastgrp,
1911                            (unsigned long) mfc->mfc_origin,
1912                            mfc->mfc_parent);
1913
1914                 if (it->cache != &mfc_unres_queue) {
1915                         seq_printf(seq, " %8lu %8lu %8lu",
1916                                    mfc->mfc_un.res.pkt,
1917                                    mfc->mfc_un.res.bytes,
1918                                    mfc->mfc_un.res.wrong_if);
1919                         for (n = mfc->mfc_un.res.minvif;
1920                              n < mfc->mfc_un.res.maxvif; n++ ) {
1921                                 if (VIF_EXISTS(net, n) &&
1922                                     mfc->mfc_un.res.ttls[n] < 255)
1923                                         seq_printf(seq,
1924                                            " %2d:%-3d",
1925                                            n, mfc->mfc_un.res.ttls[n]);
1926                         }
1927                 } else {
1928                         /* unresolved mfc_caches don't contain
1929                          * pkt, bytes and wrong_if values
1930                          */
1931                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1932                 }
1933                 seq_putc(seq, '\n');
1934         }
1935         return 0;
1936 }
1937
1938 static const struct seq_operations ipmr_mfc_seq_ops = {
1939         .start = ipmr_mfc_seq_start,
1940         .next  = ipmr_mfc_seq_next,
1941         .stop  = ipmr_mfc_seq_stop,
1942         .show  = ipmr_mfc_seq_show,
1943 };
1944
1945 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1946 {
1947         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1948                             sizeof(struct ipmr_mfc_iter));
1949 }
1950
1951 static const struct file_operations ipmr_mfc_fops = {
1952         .owner   = THIS_MODULE,
1953         .open    = ipmr_mfc_open,
1954         .read    = seq_read,
1955         .llseek  = seq_lseek,
1956         .release = seq_release_net,
1957 };
1958 #endif
1959
1960 #ifdef CONFIG_IP_PIMSM_V2
1961 static const struct net_protocol pim_protocol = {
1962         .handler        =       pim_rcv,
1963         .netns_ok       =       1,
1964 };
1965 #endif
1966
1967
1968 /*
1969  *      Setup for IP multicast routing
1970  */
1971 static int __net_init ipmr_net_init(struct net *net)
1972 {
1973         int err = 0;
1974
1975         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1976                                       GFP_KERNEL);
1977         if (!net->ipv4.vif_table) {
1978                 err = -ENOMEM;
1979                 goto fail;
1980         }
1981
1982         /* Forwarding cache */
1983         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1984                                             sizeof(struct mfc_cache *),
1985                                             GFP_KERNEL);
1986         if (!net->ipv4.mfc_cache_array) {
1987                 err = -ENOMEM;
1988                 goto fail_mfc_cache;
1989         }
1990
1991 #ifdef CONFIG_IP_PIMSM
1992         net->ipv4.mroute_reg_vif_num = -1;
1993 #endif
1994
1995 #ifdef CONFIG_PROC_FS
1996         err = -ENOMEM;
1997         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1998                 goto proc_vif_fail;
1999         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2000                 goto proc_cache_fail;
2001 #endif
2002         return 0;
2003
2004 #ifdef CONFIG_PROC_FS
2005 proc_cache_fail:
2006         proc_net_remove(net, "ip_mr_vif");
2007 proc_vif_fail:
2008         kfree(net->ipv4.mfc_cache_array);
2009 #endif
2010 fail_mfc_cache:
2011         kfree(net->ipv4.vif_table);
2012 fail:
2013         return err;
2014 }
2015
2016 static void __net_exit ipmr_net_exit(struct net *net)
2017 {
2018 #ifdef CONFIG_PROC_FS
2019         proc_net_remove(net, "ip_mr_cache");
2020         proc_net_remove(net, "ip_mr_vif");
2021 #endif
2022         kfree(net->ipv4.mfc_cache_array);
2023         kfree(net->ipv4.vif_table);
2024 }
2025
2026 static struct pernet_operations ipmr_net_ops = {
2027         .init = ipmr_net_init,
2028         .exit = ipmr_net_exit,
2029 };
2030
2031 int __init ip_mr_init(void)
2032 {
2033         int err;
2034
2035         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2036                                        sizeof(struct mfc_cache),
2037                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2038                                        NULL);
2039         if (!mrt_cachep)
2040                 return -ENOMEM;
2041
2042         err = register_pernet_subsys(&ipmr_net_ops);
2043         if (err)
2044                 goto reg_pernet_fail;
2045
2046         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2047         err = register_netdevice_notifier(&ip_mr_notifier);
2048         if (err)
2049                 goto reg_notif_fail;
2050 #ifdef CONFIG_IP_PIMSM_V2
2051         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2052                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2053                 err = -EAGAIN;
2054                 goto add_proto_fail;
2055         }
2056 #endif
2057         return 0;
2058
2059 #ifdef CONFIG_IP_PIMSM_V2
2060 add_proto_fail:
2061         unregister_netdevice_notifier(&ip_mr_notifier);
2062 #endif
2063 reg_notif_fail:
2064         del_timer(&ipmr_expire_timer);
2065         unregister_pernet_subsys(&ipmr_net_ops);
2066 reg_pernet_fail:
2067         kmem_cache_destroy(mrt_cachep);
2068         return err;
2069 }