net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Authors:
   5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   6  *
   7  *      Fixes:
   8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
   9  *                                      a module taking up 2 pages).
  10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  11  *                                      to keep ip_forward happy.
  12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  14  *              David Woodhouse :       Perform some basic ICMP handling.
  15  *                                      IPIP Routing without decapsulation.
  16  *              Carlos Picoto   :       GRE over IP support
  17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  18  *                                      I do not want to merge them together.
  19  *
  20  *      This program is free software; you can redistribute it and/or
  21  *      modify it under the terms of the GNU General Public License
  22  *      as published by the Free Software Foundation; either version
  23  *      2 of the License, or (at your option) any later version.
  24  *
  25  */
  26
  27 /* tunnel.c: an IP tunnel driver
  28
  29         The purpose of this driver is to provide an IP tunnel through
  30         which you can tunnel network traffic transparently across subnets.
  31
  32         This was written by looking at Nick Holloway's dummy driver
  33         Thanks for the great code!
  34
  35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  36
  37         Minor tweaks:
  38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  39                 dev->hard_header/hard_header_len changed to use no headers.
  40                 Comments/bracketing tweaked.
  41                 Made the tunnels use dev->name not tunnel: when error reporting.
  42                 Added tx_dropped stat
  43
  44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
  45
  46         Reworked:
  47                 Changed to tunnel to destination gateway in addition to the
  48                         tunnel's pointopoint address
  49                 Almost completely rewritten
  50                 Note:  There is currently no firewall or ICMP handling done.
  51
  52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  53
  54 */
  55
  56 /* Things I wish I had known when writing the tunnel driver:
  57
  58         When the tunnel_xmit() function is called, the skb contains the
  59         packet to be sent (plus a great deal of extra info), and dev
  60         contains the tunnel device that _we_ are.
  61
  62         When we are passed a packet, we are expected to fill in the
  63         source address with our source IP address.
  64
  65         What is the proper way to allocate, copy and free a buffer?
  66         After you allocate it, it is a "0 length" chunk of memory
  67         starting at zero.  If you want to add headers to the buffer
  68         later, you'll have to call "skb_reserve(skb, amount)" with
  69         the amount of memory you want reserved.  Then, you call
  70         "skb_put(skb, amount)" with the amount of space you want in
  71         the buffer.  skb_put() returns a pointer to the top (#0) of
  72         that buffer.  skb->len is set to the amount of space you have
  73         "allocated" with skb_put().  You can then write up to skb->len
  74         bytes to that buffer.  If you need more, you can call skb_put()
  75         again with the additional amount of space you need.  You can
  76         find out how much more space you can allocate by calling
  77         "skb_tailroom(skb)".
  78         Now, to add header space, call "skb_push(skb, header_len)".
  79         This creates space at the beginning of the buffer and returns
  80         a pointer to this new space.  If later you need to strip a
  81         header from a buffer, call "skb_pull(skb, header_len)".
  82         skb_headroom() will return how much space is left at the top
  83         of the buffer (before the main data).  Remember, this headroom
  84         space must be reserved before the skb_put() function is called.
  85         */
  86
  87 /*
  88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  89
  90    For comments look at net/ipv4/ip_gre.c --ANK
  91  */
  92
  93
  94 #include <linux/capability.h>
  95 #include <linux/module.h>
  96 #include <linux/types.h>
  97 #include <linux/kernel.h>
  98 #include <linux/slab.h>
  99 #include <asm/uaccess.h>
 100 #include <linux/skbuff.h>
 101 #include <linux/netdevice.h>
 102 #include <linux/in.h>
 103 #include <linux/tcp.h>
 104 #include <linux/udp.h>
 105 #include <linux/if_arp.h>
 106 #include <linux/mroute.h>
 107 #include <linux/init.h>
 108 #include <linux/netfilter_ipv4.h>
 109 #include <linux/if_ether.h>
 110
 111 #include <net/sock.h>
 112 #include <net/ip.h>
 113 #include <net/icmp.h>
 114 #include <net/ipip.h>
 115 #include <net/inet_ecn.h>
 116 #include <net/xfrm.h>
 117 #include <net/net_namespace.h>
 118 #include <net/netns/generic.h>
 119
 120 #define HASH_SIZE  16
 121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 122
 123 static int ipip_net_id __read_mostly;
 124 struct ipip_net {
 125         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
 126         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
 127         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
 128         struct ip_tunnel __rcu *tunnels_wc[1];
 129         struct ip_tunnel __rcu **tunnels[4];
 130
 131         struct net_device *fb_tunnel_dev;
 132 };
 133
 134 static int ipip_tunnel_init(struct net_device *dev);
 135 static void ipip_tunnel_setup(struct net_device *dev);
 136 static void ipip_dev_free(struct net_device *dev);
 137
 138 /*
 139  * Locking : hash tables are protected by RCU and RTNL
 140  */
 141
 142 #define for_each_ip_tunnel_rcu(start) \
 143         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 144
 145 /* often modified stats are per cpu, other are shared (netdev->stats) */
 146 struct pcpu_tstats {
 147         u64     rx_packets;
 148         u64     rx_bytes;
 149         u64     tx_packets;
 150         u64     tx_bytes;
 151         struct u64_stats_sync   syncp;
 152 };
 153
 154 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
 155                                                   struct rtnl_link_stats64 *tot)
 156 {
 157         int i;
 158
 159         for_each_possible_cpu(i) {
 160                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
 161                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
 162                 unsigned int start;
 163
 164                 do {
 165                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
 166                         rx_packets = tstats->rx_packets;
 167                         tx_packets = tstats->tx_packets;
 168                         rx_bytes = tstats->rx_bytes;
 169                         tx_bytes = tstats->tx_bytes;
 170                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
 171
 172                 tot->rx_packets += rx_packets;
 173                 tot->tx_packets += tx_packets;
 174                 tot->rx_bytes   += rx_bytes;
 175                 tot->tx_bytes   += tx_bytes;
 176         }
 177
 178         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 179         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
 180         tot->tx_dropped = dev->stats.tx_dropped;
 181         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
 182         tot->tx_errors = dev->stats.tx_errors;
 183         tot->collisions = dev->stats.collisions;
 184
 185         return tot;
 186 }
 187
 188 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
 189                 __be32 remote, __be32 local)
 190 {
 191         unsigned int h0 = HASH(remote);
 192         unsigned int h1 = HASH(local);
 193         struct ip_tunnel *t;
 194         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 195
 196         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
 197                 if (local == t->parms.iph.saddr &&
 198                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 199                         return t;
 200
 201         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
 202                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 203                         return t;
 204
 205         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
 206                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 207                         return t;
 208
 209         t = rcu_dereference(ipn->tunnels_wc[0]);
 210         if (t && (t->dev->flags&IFF_UP))
 211                 return t;
 212         return NULL;
 213 }
 214
 215 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
 216                 struct ip_tunnel_parm *parms)
 217 {
 218         __be32 remote = parms->iph.daddr;
 219         __be32 local = parms->iph.saddr;
 220         unsigned int h = 0;
 221         int prio = 0;
 222
 223         if (remote) {
 224                 prio |= 2;
 225                 h ^= HASH(remote);
 226         }
 227         if (local) {
 228                 prio |= 1;
 229                 h ^= HASH(local);
 230         }
 231         return &ipn->tunnels[prio][h];
 232 }
 233
 234 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
 235                 struct ip_tunnel *t)
 236 {
 237         return __ipip_bucket(ipn, &t->parms);
 238 }
 239
 240 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
 241 {
 242         struct ip_tunnel __rcu **tp;
 243         struct ip_tunnel *iter;
 244
 245         for (tp = ipip_bucket(ipn, t);
 246              (iter = rtnl_dereference(*tp)) != NULL;
 247              tp = &iter->next) {
 248                 if (t == iter) {
 249                         rcu_assign_pointer(*tp, t->next);
 250                         break;
 251                 }
 252         }
 253 }
 254
 255 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
 256 {
 257         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
 258
 259         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 260         rcu_assign_pointer(*tp, t);
 261 }
 262
 263 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
 264                 struct ip_tunnel_parm *parms, int create)
 265 {
 266         __be32 remote = parms->iph.daddr;
 267         __be32 local = parms->iph.saddr;
 268         struct ip_tunnel *t, *nt;
 269         struct ip_tunnel __rcu **tp;
 270         struct net_device *dev;
 271         char name[IFNAMSIZ];
 272         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 273
 274         for (tp = __ipip_bucket(ipn, parms);
 275                  (t = rtnl_dereference(*tp)) != NULL;
 276                  tp = &t->next) {
 277                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 278                         return t;
 279         }
 280         if (!create)
 281                 return NULL;
 282
 283         if (parms->name[0])
 284                 strlcpy(name, parms->name, IFNAMSIZ);
 285         else
 286                 strcpy(name, "tunl%d");
 287
 288         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 289         if (dev == NULL)
 290                 return NULL;
 291
 292         dev_net_set(dev, net);
 293
 294         nt = netdev_priv(dev);
 295         nt->parms = *parms;
 296
 297         if (ipip_tunnel_init(dev) < 0)
 298                 goto failed_free;
 299
 300         if (register_netdevice(dev) < 0)
 301                 goto failed_free;
 302
 303         strcpy(nt->parms.name, dev->name);
 304
 305         dev_hold(dev);
 306         ipip_tunnel_link(ipn, nt);
 307         return nt;
 308
 309 failed_free:
 310         ipip_dev_free(dev);
 311         return NULL;
 312 }
 313
 314 /* called with RTNL */
 315 static void ipip_tunnel_uninit(struct net_device *dev)
 316 {
 317         struct net *net = dev_net(dev);
 318         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 319
 320         if (dev == ipn->fb_tunnel_dev)
 321                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
 322         else
 323                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
 324         dev_put(dev);
 325 }
 326
 327 static int ipip_err(struct sk_buff *skb, u32 info)
 328 {
 329
 330 /* All the routers (except for Linux) return only
 331    8 bytes of packet payload. It means, that precise relaying of
 332    ICMP in the real Internet is absolutely infeasible.
 333  */
 334         const struct iphdr *iph = (const struct iphdr *)skb->data;
 335         const int type = icmp_hdr(skb)->type;
 336         const int code = icmp_hdr(skb)->code;
 337         struct ip_tunnel *t;
 338         int err;
 339
 340         switch (type) {
 341         default:
 342         case ICMP_PARAMETERPROB:
 343                 return 0;
 344
 345         case ICMP_DEST_UNREACH:
 346                 switch (code) {
 347                 case ICMP_SR_FAILED:
 348                 case ICMP_PORT_UNREACH:
 349                         /* Impossible event. */
 350                         return 0;
 351                 default:
 352                         /* All others are translated to HOST_UNREACH.
 353                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 354                            I believe they are just ether pollution. --ANK
 355                          */
 356                         break;
 357                 }
 358                 break;
 359         case ICMP_TIME_EXCEEDED:
 360                 if (code != ICMP_EXC_TTL)
 361                         return 0;
 362                 break;
 363         case ICMP_REDIRECT:
 364                 break;
 365         }
 366
 367         err = -ENOENT;
 368
 369         rcu_read_lock();
 370         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 371         if (t == NULL)
 372                 goto out;
 373
 374         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 375                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 376                                  t->dev->ifindex, 0, IPPROTO_IPIP, 0);
 377                 err = 0;
 378                 goto out;
 379         }
 380
 381         if (type == ICMP_REDIRECT) {
 382                 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
 383                               IPPROTO_IPIP, 0);
 384                 err = 0;
 385                 goto out;
 386         }
 387
 388         if (t->parms.iph.daddr == 0)
 389                 goto out;
 390
 391         err = 0;
 392         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 393                 goto out;
 394
 395         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 396                 t->err_count++;
 397         else
 398                 t->err_count = 1;
 399         t->err_time = jiffies;
 400 out:
 401         rcu_read_unlock();
 402         return err;
 403 }
 404
 405 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
 406                                         struct sk_buff *skb)
 407 {
 408         struct iphdr *inner_iph = ip_hdr(skb);
 409
 410         if (INET_ECN_is_ce(outer_iph->tos))
 411                 IP_ECN_set_ce(inner_iph);
 412 }
 413
 414 static int ipip_rcv(struct sk_buff *skb)
 415 {
 416         struct ip_tunnel *tunnel;
 417         const struct iphdr *iph = ip_hdr(skb);
 418
 419         rcu_read_lock();
 420         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 421         if (tunnel != NULL) {
 422                 struct pcpu_tstats *tstats;
 423
 424                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 425                         rcu_read_unlock();
 426                         kfree_skb(skb);
 427                         return 0;
 428                 }
 429
 430                 secpath_reset(skb);
 431
 432                 skb->mac_header = skb->network_header;
 433                 skb_reset_network_header(skb);
 434                 skb->protocol = htons(ETH_P_IP);
 435                 skb->pkt_type = PACKET_HOST;
 436
 437                 tstats = this_cpu_ptr(tunnel->dev->tstats);
 438                 u64_stats_update_begin(&tstats->syncp);
 439                 tstats->rx_packets++;
 440                 tstats->rx_bytes += skb->len;
 441                 u64_stats_update_end(&tstats->syncp);
 442
 443                 __skb_tunnel_rx(skb, tunnel->dev);
 444
 445                 ipip_ecn_decapsulate(iph, skb);
 446
 447                 netif_rx(skb);
 448
 449                 rcu_read_unlock();
 450                 return 0;
 451         }
 452         rcu_read_unlock();
 453
 454         return -1;
 455 }
 456
 457 /*
 458  *      This function assumes it is being called from dev_queue_xmit()
 459  *      and that skb is filled properly by that function.
 460  */
 461
 462 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 463 {
 464         struct ip_tunnel *tunnel = netdev_priv(dev);
 465         struct pcpu_tstats *tstats;
 466         const struct iphdr  *tiph = &tunnel->parms.iph;
 467         u8     tos = tunnel->parms.iph.tos;
 468         __be16 df = tiph->frag_off;
 469         struct rtable *rt;                      /* Route to the other host */
 470         struct net_device *tdev;                /* Device to other host */
 471         const struct iphdr  *old_iph = ip_hdr(skb);
 472         struct iphdr  *iph;                     /* Our new IP header */
 473         unsigned int max_headroom;              /* The extra header space needed */
 474         __be32 dst = tiph->daddr;
 475         struct flowi4 fl4;
 476         int    mtu;
 477
 478         if (skb->protocol != htons(ETH_P_IP))
 479                 goto tx_error;
 480
 481         if (tos & 1)
 482                 tos = old_iph->tos;
 483
 484         if (!dst) {
 485                 /* NBMA tunnel */
 486                 if ((rt = skb_rtable(skb)) == NULL) {
 487                         dev->stats.tx_fifo_errors++;
 488                         goto tx_error;
 489                 }
 490                 dst = rt_nexthop(rt, old_iph->daddr);
 491         }
 492
 493         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 494                                    dst, tiph->saddr,
 495                                    0, 0,
 496                                    IPPROTO_IPIP, RT_TOS(tos),
 497                                    tunnel->parms.link);
 498         if (IS_ERR(rt)) {
 499                 dev->stats.tx_carrier_errors++;
 500                 goto tx_error_icmp;
 501         }
 502         tdev = rt->dst.dev;
 503
 504         if (tdev == dev) {
 505                 ip_rt_put(rt);
 506                 dev->stats.collisions++;
 507                 goto tx_error;
 508         }
 509
 510         df |= old_iph->frag_off & htons(IP_DF);
 511
 512         if (df) {
 513                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 514
 515                 if (mtu < 68) {
 516                         dev->stats.collisions++;
 517                         ip_rt_put(rt);
 518                         goto tx_error;
 519                 }
 520
 521                 if (skb_dst(skb))
 522                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 523
 524                 if ((old_iph->frag_off & htons(IP_DF)) &&
 525                     mtu < ntohs(old_iph->tot_len)) {
 526                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 527                                   htonl(mtu));
 528                         ip_rt_put(rt);
 529                         goto tx_error;
 530                 }
 531         }
 532
 533         if (tunnel->err_count > 0) {
 534                 if (time_before(jiffies,
 535                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 536                         tunnel->err_count--;
 537                         dst_link_failure(skb);
 538                 } else
 539                         tunnel->err_count = 0;
 540         }
 541
 542         /*
 543          * Okay, now see if we can stuff it in the buffer as-is.
 544          */
 545         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 546
 547         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 548             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 549                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 550                 if (!new_skb) {
 551                         ip_rt_put(rt);
 552                         dev->stats.tx_dropped++;
 553                         dev_kfree_skb(skb);
 554                         return NETDEV_TX_OK;
 555                 }
 556                 if (skb->sk)
 557                         skb_set_owner_w(new_skb, skb->sk);
 558                 dev_kfree_skb(skb);
 559                 skb = new_skb;
 560                 old_iph = ip_hdr(skb);
 561         }
 562
 563         skb->transport_header = skb->network_header;
 564         skb_push(skb, sizeof(struct iphdr));
 565         skb_reset_network_header(skb);
 566         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 567         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 568                               IPSKB_REROUTED);
 569         skb_dst_drop(skb);
 570         skb_dst_set(skb, &rt->dst);
 571
 572         /*
 573          *      Push down and install the IPIP header.
 574          */
 575
 576         iph                     =       ip_hdr(skb);
 577         iph->version            =       4;
 578         iph->ihl                =       sizeof(struct iphdr)>>2;
 579         iph->frag_off           =       df;
 580         iph->protocol           =       IPPROTO_IPIP;
 581         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 582         iph->daddr              =       fl4.daddr;
 583         iph->saddr              =       fl4.saddr;
 584
 585         if ((iph->ttl = tiph->ttl) == 0)
 586                 iph->ttl        =       old_iph->ttl;
 587
 588         nf_reset(skb);
 589         tstats = this_cpu_ptr(dev->tstats);
 590         __IPTUNNEL_XMIT(tstats, &dev->stats);
 591         return NETDEV_TX_OK;
 592
 593 tx_error_icmp:
 594         dst_link_failure(skb);
 595 tx_error:
 596         dev->stats.tx_errors++;
 597         dev_kfree_skb(skb);
 598         return NETDEV_TX_OK;
 599 }
 600
 601 static void ipip_tunnel_bind_dev(struct net_device *dev)
 602 {
 603         struct net_device *tdev = NULL;
 604         struct ip_tunnel *tunnel;
 605         const struct iphdr *iph;
 606
 607         tunnel = netdev_priv(dev);
 608         iph = &tunnel->parms.iph;
 609
 610         if (iph->daddr) {
 611                 struct rtable *rt;
 612                 struct flowi4 fl4;
 613
 614                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 615                                            iph->daddr, iph->saddr,
 616                                            0, 0,
 617                                            IPPROTO_IPIP,
 618                                            RT_TOS(iph->tos),
 619                                            tunnel->parms.link);
 620                 if (!IS_ERR(rt)) {
 621                         tdev = rt->dst.dev;
 622                         ip_rt_put(rt);
 623                 }
 624                 dev->flags |= IFF_POINTOPOINT;
 625         }
 626
 627         if (!tdev && tunnel->parms.link)
 628                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 629
 630         if (tdev) {
 631                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 632                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 633         }
 634         dev->iflink = tunnel->parms.link;
 635 }
 636
 637 static int
 638 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 639 {
 640         int err = 0;
 641         struct ip_tunnel_parm p;
 642         struct ip_tunnel *t;
 643         struct net *net = dev_net(dev);
 644         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 645
 646         switch (cmd) {
 647         case SIOCGETTUNNEL:
 648                 t = NULL;
 649                 if (dev == ipn->fb_tunnel_dev) {
 650                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 651                                 err = -EFAULT;
 652                                 break;
 653                         }
 654                         t = ipip_tunnel_locate(net, &p, 0);
 655                 }
 656                 if (t == NULL)
 657                         t = netdev_priv(dev);
 658                 memcpy(&p, &t->parms, sizeof(p));
 659                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 660                         err = -EFAULT;
 661                 break;
 662
 663         case SIOCADDTUNNEL:
 664         case SIOCCHGTUNNEL:
 665                 err = -EPERM;
 666                 if (!capable(CAP_NET_ADMIN))
 667                         goto done;
 668
 669                 err = -EFAULT;
 670                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 671                         goto done;
 672
 673                 err = -EINVAL;
 674                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 675                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 676                         goto done;
 677                 if (p.iph.ttl)
 678                         p.iph.frag_off |= htons(IP_DF);
 679
 680                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
 681
 682                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 683                         if (t != NULL) {
 684                                 if (t->dev != dev) {
 685                                         err = -EEXIST;
 686                                         break;
 687                                 }
 688                         } else {
 689                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 690                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 691                                         err = -EINVAL;
 692                                         break;
 693                                 }
 694                                 t = netdev_priv(dev);
 695                                 ipip_tunnel_unlink(ipn, t);
 696                                 synchronize_net();
 697                                 t->parms.iph.saddr = p.iph.saddr;
 698                                 t->parms.iph.daddr = p.iph.daddr;
 699                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 700                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 701                                 ipip_tunnel_link(ipn, t);
 702                                 netdev_state_change(dev);
 703                         }
 704                 }
 705
 706                 if (t) {
 707                         err = 0;
 708                         if (cmd == SIOCCHGTUNNEL) {
 709                                 t->parms.iph.ttl = p.iph.ttl;
 710                                 t->parms.iph.tos = p.iph.tos;
 711                                 t->parms.iph.frag_off = p.iph.frag_off;
 712                                 if (t->parms.link != p.link) {
 713                                         t->parms.link = p.link;
 714                                         ipip_tunnel_bind_dev(dev);
 715                                         netdev_state_change(dev);
 716                                 }
 717                         }
 718                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 719                                 err = -EFAULT;
 720                 } else
 721                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 722                 break;
 723
 724         case SIOCDELTUNNEL:
 725                 err = -EPERM;
 726                 if (!capable(CAP_NET_ADMIN))
 727                         goto done;
 728
 729                 if (dev == ipn->fb_tunnel_dev) {
 730                         err = -EFAULT;
 731                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 732                                 goto done;
 733                         err = -ENOENT;
 734                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
 735                                 goto done;
 736                         err = -EPERM;
 737                         if (t->dev == ipn->fb_tunnel_dev)
 738                                 goto done;
 739                         dev = t->dev;
 740                 }
 741                 unregister_netdevice(dev);
 742                 err = 0;
 743                 break;
 744
 745         default:
 746                 err = -EINVAL;
 747         }
 748
 749 done:
 750         return err;
 751 }
 752
 753 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 754 {
 755         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 756                 return -EINVAL;
 757         dev->mtu = new_mtu;
 758         return 0;
 759 }
 760
 761 static const struct net_device_ops ipip_netdev_ops = {
 762         .ndo_uninit     = ipip_tunnel_uninit,
 763         .ndo_start_xmit = ipip_tunnel_xmit,
 764         .ndo_do_ioctl   = ipip_tunnel_ioctl,
 765         .ndo_change_mtu = ipip_tunnel_change_mtu,
 766         .ndo_get_stats64 = ipip_get_stats64,
 767 };
 768
 769 static void ipip_dev_free(struct net_device *dev)
 770 {
 771         free_percpu(dev->tstats);
 772         free_netdev(dev);
 773 }
 774
 775 static void ipip_tunnel_setup(struct net_device *dev)
 776 {
 777         dev->netdev_ops         = &ipip_netdev_ops;
 778         dev->destructor         = ipip_dev_free;
 779
 780         dev->type               = ARPHRD_TUNNEL;
 781         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 782         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 783         dev->flags              = IFF_NOARP;
 784         dev->iflink             = 0;
 785         dev->addr_len           = 4;
 786         dev->features           |= NETIF_F_NETNS_LOCAL;
 787         dev->features           |= NETIF_F_LLTX;
 788         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 789 }
 790
 791 static int ipip_tunnel_init(struct net_device *dev)
 792 {
 793         struct ip_tunnel *tunnel = netdev_priv(dev);
 794
 795         tunnel->dev = dev;
 796
 797         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 798         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 799
 800         ipip_tunnel_bind_dev(dev);
 801
 802         dev->tstats = alloc_percpu(struct pcpu_tstats);
 803         if (!dev->tstats)
 804                 return -ENOMEM;
 805
 806         return 0;
 807 }
 808
 809 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
 810 {
 811         struct ip_tunnel *tunnel = netdev_priv(dev);
 812         struct iphdr *iph = &tunnel->parms.iph;
 813         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
 814
 815         tunnel->dev = dev;
 816         strcpy(tunnel->parms.name, dev->name);
 817
 818         iph->version            = 4;
 819         iph->protocol           = IPPROTO_IPIP;
 820         iph->ihl                = 5;
 821
 822         dev->tstats = alloc_percpu(struct pcpu_tstats);
 823         if (!dev->tstats)
 824                 return -ENOMEM;
 825
 826         dev_hold(dev);
 827         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
 828         return 0;
 829 }
 830
 831 static struct xfrm_tunnel ipip_handler __read_mostly = {
 832         .handler        =       ipip_rcv,
 833         .err_handler    =       ipip_err,
 834         .priority       =       1,
 835 };
 836
 837 static const char banner[] __initconst =
 838         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 839
 840 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
 841 {
 842         int prio;
 843
 844         for (prio = 1; prio < 4; prio++) {
 845                 int h;
 846                 for (h = 0; h < HASH_SIZE; h++) {
 847                         struct ip_tunnel *t;
 848
 849                         t = rtnl_dereference(ipn->tunnels[prio][h]);
 850                         while (t != NULL) {
 851                                 unregister_netdevice_queue(t->dev, head);
 852                                 t = rtnl_dereference(t->next);
 853                         }
 854                 }
 855         }
 856 }
 857
 858 static int __net_init ipip_init_net(struct net *net)
 859 {
 860         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 861         struct ip_tunnel *t;
 862         int err;
 863
 864         ipn->tunnels[0] = ipn->tunnels_wc;
 865         ipn->tunnels[1] = ipn->tunnels_l;
 866         ipn->tunnels[2] = ipn->tunnels_r;
 867         ipn->tunnels[3] = ipn->tunnels_r_l;
 868
 869         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 870                                            "tunl0",
 871                                            ipip_tunnel_setup);
 872         if (!ipn->fb_tunnel_dev) {
 873                 err = -ENOMEM;
 874                 goto err_alloc_dev;
 875         }
 876         dev_net_set(ipn->fb_tunnel_dev, net);
 877
 878         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
 879         if (err)
 880                 goto err_reg_dev;
 881
 882         if ((err = register_netdev(ipn->fb_tunnel_dev)))
 883                 goto err_reg_dev;
 884
 885         t = netdev_priv(ipn->fb_tunnel_dev);
 886
 887         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
 888         return 0;
 889
 890 err_reg_dev:
 891         ipip_dev_free(ipn->fb_tunnel_dev);
 892 err_alloc_dev:
 893         /* nothing */
 894         return err;
 895 }
 896
 897 static void __net_exit ipip_exit_net(struct net *net)
 898 {
 899         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 900         LIST_HEAD(list);
 901
 902         rtnl_lock();
 903         ipip_destroy_tunnels(ipn, &list);
 904         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
 905         unregister_netdevice_many(&list);
 906         rtnl_unlock();
 907 }
 908
 909 static struct pernet_operations ipip_net_ops = {
 910         .init = ipip_init_net,
 911         .exit = ipip_exit_net,
 912         .id   = &ipip_net_id,
 913         .size = sizeof(struct ipip_net),
 914 };
 915
 916 static int __init ipip_init(void)
 917 {
 918         int err;
 919
 920         printk(banner);
 921
 922         err = register_pernet_device(&ipip_net_ops);
 923         if (err < 0)
 924                 return err;
 925         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 926         if (err < 0) {
 927                 unregister_pernet_device(&ipip_net_ops);
 928                 pr_info("%s: can't register tunnel\n", __func__);
 929         }
 930         return err;
 931 }
 932
 933 static void __exit ipip_fini(void)
 934 {
 935         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 936                 pr_info("%s: can't deregister tunnel\n", __func__);
 937
 938         unregister_pernet_device(&ipip_net_ops);
 939 }
 940
 941 module_init(ipip_init);
 942 module_exit(ipip_fini);
 943 MODULE_LICENSE("GPL");
 944 MODULE_ALIAS_NETDEV("tunl0");