b50435ba0ce5d9a6ccfea63b593f89ddb07e0b99
[platform/adaptation/renesas_rcar/renesas_kernel.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
127 static int ipip_net_id __read_mostly;
128 struct ipip_net {
129         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132         struct ip_tunnel __rcu *tunnels_wc[1];
133         struct ip_tunnel __rcu **tunnels[4];
134
135         struct net_device *fb_tunnel_dev;
136 };
137
138 static int ipip_tunnel_init(struct net_device *dev);
139 static void ipip_tunnel_setup(struct net_device *dev);
140 static void ipip_dev_free(struct net_device *dev);
141 static struct rtnl_link_ops ipip_link_ops __read_mostly;
142
143 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
144                                                   struct rtnl_link_stats64 *tot)
145 {
146         int i;
147
148         for_each_possible_cpu(i) {
149                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
150                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
151                 unsigned int start;
152
153                 do {
154                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
155                         rx_packets = tstats->rx_packets;
156                         tx_packets = tstats->tx_packets;
157                         rx_bytes = tstats->rx_bytes;
158                         tx_bytes = tstats->tx_bytes;
159                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
160
161                 tot->rx_packets += rx_packets;
162                 tot->tx_packets += tx_packets;
163                 tot->rx_bytes   += rx_bytes;
164                 tot->tx_bytes   += tx_bytes;
165         }
166
167         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
168         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
169         tot->tx_dropped = dev->stats.tx_dropped;
170         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
171         tot->tx_errors = dev->stats.tx_errors;
172         tot->collisions = dev->stats.collisions;
173
174         return tot;
175 }
176
177 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
178                 __be32 remote, __be32 local)
179 {
180         unsigned int h0 = HASH(remote);
181         unsigned int h1 = HASH(local);
182         struct ip_tunnel *t;
183         struct ipip_net *ipn = net_generic(net, ipip_net_id);
184
185         for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
186                 if (local == t->parms.iph.saddr &&
187                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188                         return t;
189
190         for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
191                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
192                         return t;
193
194         for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
195                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
196                         return t;
197
198         t = rcu_dereference(ipn->tunnels_wc[0]);
199         if (t && (t->dev->flags&IFF_UP))
200                 return t;
201         return NULL;
202 }
203
204 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
205                 struct ip_tunnel_parm *parms)
206 {
207         __be32 remote = parms->iph.daddr;
208         __be32 local = parms->iph.saddr;
209         unsigned int h = 0;
210         int prio = 0;
211
212         if (remote) {
213                 prio |= 2;
214                 h ^= HASH(remote);
215         }
216         if (local) {
217                 prio |= 1;
218                 h ^= HASH(local);
219         }
220         return &ipn->tunnels[prio][h];
221 }
222
223 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
224                 struct ip_tunnel *t)
225 {
226         return __ipip_bucket(ipn, &t->parms);
227 }
228
229 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
230 {
231         struct ip_tunnel __rcu **tp;
232         struct ip_tunnel *iter;
233
234         for (tp = ipip_bucket(ipn, t);
235              (iter = rtnl_dereference(*tp)) != NULL;
236              tp = &iter->next) {
237                 if (t == iter) {
238                         rcu_assign_pointer(*tp, t->next);
239                         break;
240                 }
241         }
242 }
243
244 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
245 {
246         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
247
248         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
249         rcu_assign_pointer(*tp, t);
250 }
251
252 static int ipip_tunnel_create(struct net_device *dev)
253 {
254         struct ip_tunnel *t = netdev_priv(dev);
255         struct net *net = dev_net(dev);
256         struct ipip_net *ipn = net_generic(net, ipip_net_id);
257         int err;
258
259         err = ipip_tunnel_init(dev);
260         if (err < 0)
261                 goto out;
262
263         err = register_netdevice(dev);
264         if (err < 0)
265                 goto out;
266
267         strcpy(t->parms.name, dev->name);
268         dev->rtnl_link_ops = &ipip_link_ops;
269
270         dev_hold(dev);
271         ipip_tunnel_link(ipn, t);
272         return 0;
273
274 out:
275         return err;
276 }
277
278 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
279                 struct ip_tunnel_parm *parms, int create)
280 {
281         __be32 remote = parms->iph.daddr;
282         __be32 local = parms->iph.saddr;
283         struct ip_tunnel *t, *nt;
284         struct ip_tunnel __rcu **tp;
285         struct net_device *dev;
286         char name[IFNAMSIZ];
287         struct ipip_net *ipn = net_generic(net, ipip_net_id);
288
289         for (tp = __ipip_bucket(ipn, parms);
290                  (t = rtnl_dereference(*tp)) != NULL;
291                  tp = &t->next) {
292                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
293                         return t;
294         }
295         if (!create)
296                 return NULL;
297
298         if (parms->name[0])
299                 strlcpy(name, parms->name, IFNAMSIZ);
300         else
301                 strcpy(name, "tunl%d");
302
303         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
304         if (dev == NULL)
305                 return NULL;
306
307         dev_net_set(dev, net);
308
309         nt = netdev_priv(dev);
310         nt->parms = *parms;
311
312         if (ipip_tunnel_create(dev) < 0)
313                 goto failed_free;
314
315         return nt;
316
317 failed_free:
318         ipip_dev_free(dev);
319         return NULL;
320 }
321
322 /* called with RTNL */
323 static void ipip_tunnel_uninit(struct net_device *dev)
324 {
325         struct net *net = dev_net(dev);
326         struct ipip_net *ipn = net_generic(net, ipip_net_id);
327
328         if (dev == ipn->fb_tunnel_dev)
329                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
330         else
331                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
332         dev_put(dev);
333 }
334
335 static int ipip_err(struct sk_buff *skb, u32 info)
336 {
337
338 /* All the routers (except for Linux) return only
339    8 bytes of packet payload. It means, that precise relaying of
340    ICMP in the real Internet is absolutely infeasible.
341  */
342         const struct iphdr *iph = (const struct iphdr *)skb->data;
343         const int type = icmp_hdr(skb)->type;
344         const int code = icmp_hdr(skb)->code;
345         struct ip_tunnel *t;
346         int err;
347
348         switch (type) {
349         default:
350         case ICMP_PARAMETERPROB:
351                 return 0;
352
353         case ICMP_DEST_UNREACH:
354                 switch (code) {
355                 case ICMP_SR_FAILED:
356                 case ICMP_PORT_UNREACH:
357                         /* Impossible event. */
358                         return 0;
359                 default:
360                         /* All others are translated to HOST_UNREACH.
361                            rfc2003 contains "deep thoughts" about NET_UNREACH,
362                            I believe they are just ether pollution. --ANK
363                          */
364                         break;
365                 }
366                 break;
367         case ICMP_TIME_EXCEEDED:
368                 if (code != ICMP_EXC_TTL)
369                         return 0;
370                 break;
371         case ICMP_REDIRECT:
372                 break;
373         }
374
375         err = -ENOENT;
376         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
377         if (t == NULL)
378                 goto out;
379
380         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
381                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
382                                  t->dev->ifindex, 0, IPPROTO_IPIP, 0);
383                 err = 0;
384                 goto out;
385         }
386
387         if (type == ICMP_REDIRECT) {
388                 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
389                               IPPROTO_IPIP, 0);
390                 err = 0;
391                 goto out;
392         }
393
394         if (t->parms.iph.daddr == 0)
395                 goto out;
396
397         err = 0;
398         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
399                 goto out;
400
401         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
402                 t->err_count++;
403         else
404                 t->err_count = 1;
405         t->err_time = jiffies;
406 out:
407
408         return err;
409 }
410
411 static int ipip_rcv(struct sk_buff *skb)
412 {
413         struct ip_tunnel *tunnel;
414         const struct iphdr *iph = ip_hdr(skb);
415         int err;
416
417         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
418         if (tunnel != NULL) {
419                 struct pcpu_tstats *tstats;
420
421                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
422                         goto drop;
423
424                 secpath_reset(skb);
425
426                 skb->mac_header = skb->network_header;
427                 skb_reset_network_header(skb);
428                 skb->protocol = htons(ETH_P_IP);
429                 skb->pkt_type = PACKET_HOST;
430
431                 __skb_tunnel_rx(skb, tunnel->dev);
432
433                 err = IP_ECN_decapsulate(iph, skb);
434                 if (unlikely(err)) {
435                         if (log_ecn_error)
436                                 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
437                                                      &iph->saddr, iph->tos);
438                         if (err > 1) {
439                                 ++tunnel->dev->stats.rx_frame_errors;
440                                 ++tunnel->dev->stats.rx_errors;
441                                 goto drop;
442                         }
443                 }
444
445                 tstats = this_cpu_ptr(tunnel->dev->tstats);
446                 u64_stats_update_begin(&tstats->syncp);
447                 tstats->rx_packets++;
448                 tstats->rx_bytes += skb->len;
449                 u64_stats_update_end(&tstats->syncp);
450
451                 netif_rx(skb);
452                 return 0;
453         }
454
455         return -1;
456
457 drop:
458         kfree_skb(skb);
459         return 0;
460 }
461
462 /*
463  *      This function assumes it is being called from dev_queue_xmit()
464  *      and that skb is filled properly by that function.
465  */
466
467 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
468 {
469         struct ip_tunnel *tunnel = netdev_priv(dev);
470         const struct iphdr  *tiph = &tunnel->parms.iph;
471         u8     tos = tunnel->parms.iph.tos;
472         __be16 df = tiph->frag_off;
473         struct rtable *rt;                      /* Route to the other host */
474         struct net_device *tdev;                /* Device to other host */
475         const struct iphdr  *old_iph;
476         struct iphdr  *iph;                     /* Our new IP header */
477         unsigned int max_headroom;              /* The extra header space needed */
478         __be32 dst = tiph->daddr;
479         struct flowi4 fl4;
480         int    mtu;
481         int err;
482         int pkt_len;
483
484         if (skb->protocol != htons(ETH_P_IP))
485                 goto tx_error;
486         old_iph = ip_hdr(skb);
487
488         if (tos & 1)
489                 tos = old_iph->tos;
490
491         if (!dst) {
492                 /* NBMA tunnel */
493                 if ((rt = skb_rtable(skb)) == NULL) {
494                         dev->stats.tx_fifo_errors++;
495                         goto tx_error;
496                 }
497                 dst = rt_nexthop(rt, old_iph->daddr);
498         }
499
500         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
501                                    dst, tiph->saddr,
502                                    0, 0,
503                                    IPPROTO_IPIP, RT_TOS(tos),
504                                    tunnel->parms.link);
505         if (IS_ERR(rt)) {
506                 dev->stats.tx_carrier_errors++;
507                 goto tx_error_icmp;
508         }
509         tdev = rt->dst.dev;
510
511         if (tdev == dev) {
512                 ip_rt_put(rt);
513                 dev->stats.collisions++;
514                 goto tx_error;
515         }
516
517         df |= old_iph->frag_off & htons(IP_DF);
518
519         if (df) {
520                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
521
522                 if (mtu < 68) {
523                         dev->stats.collisions++;
524                         ip_rt_put(rt);
525                         goto tx_error;
526                 }
527
528                 if (skb_dst(skb))
529                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
530
531                 if ((old_iph->frag_off & htons(IP_DF)) &&
532                     mtu < ntohs(old_iph->tot_len)) {
533                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
534                                   htonl(mtu));
535                         ip_rt_put(rt);
536                         goto tx_error;
537                 }
538         }
539
540         if (tunnel->err_count > 0) {
541                 if (time_before(jiffies,
542                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
543                         tunnel->err_count--;
544                         dst_link_failure(skb);
545                 } else
546                         tunnel->err_count = 0;
547         }
548
549         /*
550          * Okay, now see if we can stuff it in the buffer as-is.
551          */
552         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
553
554         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
555             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
556                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
557                 if (!new_skb) {
558                         ip_rt_put(rt);
559                         dev->stats.tx_dropped++;
560                         dev_kfree_skb(skb);
561                         return NETDEV_TX_OK;
562                 }
563                 if (skb->sk)
564                         skb_set_owner_w(new_skb, skb->sk);
565                 dev_kfree_skb(skb);
566                 skb = new_skb;
567                 old_iph = ip_hdr(skb);
568         }
569
570         if (!skb->encapsulation) {
571                 skb_reset_inner_headers(skb);
572                 skb->encapsulation = 1;
573         }
574         if (skb->ip_summed != CHECKSUM_PARTIAL)
575                 skb->ip_summed = CHECKSUM_NONE;
576
577         skb->transport_header = skb->network_header;
578         skb_push(skb, sizeof(struct iphdr));
579         skb_reset_network_header(skb);
580         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
581         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
582                               IPSKB_REROUTED);
583         skb_dst_drop(skb);
584         skb_dst_set(skb, &rt->dst);
585
586         /*
587          *      Push down and install the IPIP header.
588          */
589
590         iph                     =       ip_hdr(skb);
591         iph->version            =       4;
592         iph->ihl                =       sizeof(struct iphdr)>>2;
593         iph->frag_off           =       df;
594         iph->protocol           =       IPPROTO_IPIP;
595         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
596         iph->daddr              =       fl4.daddr;
597         iph->saddr              =       fl4.saddr;
598         tunnel_ip_select_ident(skb, old_iph, &rt->dst);
599
600         if ((iph->ttl = tiph->ttl) == 0)
601                 iph->ttl        =       old_iph->ttl;
602
603         nf_reset(skb);
604
605         pkt_len = skb->len - skb_transport_offset(skb);
606         err = ip_local_out(skb);
607         if (likely(net_xmit_eval(err) == 0)) {
608                 struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
609
610                 u64_stats_update_begin(&tstats->syncp);
611                 tstats->tx_bytes += pkt_len;
612                 tstats->tx_packets++;
613                 u64_stats_update_end(&tstats->syncp);
614         } else {
615                 dev->stats.tx_errors++;
616                 dev->stats.tx_aborted_errors++;
617         }
618
619         return NETDEV_TX_OK;
620
621 tx_error_icmp:
622         dst_link_failure(skb);
623 tx_error:
624         dev->stats.tx_errors++;
625         dev_kfree_skb(skb);
626         return NETDEV_TX_OK;
627 }
628
629 static void ipip_tunnel_bind_dev(struct net_device *dev)
630 {
631         struct net_device *tdev = NULL;
632         struct ip_tunnel *tunnel;
633         const struct iphdr *iph;
634
635         tunnel = netdev_priv(dev);
636         iph = &tunnel->parms.iph;
637
638         if (iph->daddr) {
639                 struct rtable *rt;
640                 struct flowi4 fl4;
641
642                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
643                                            iph->daddr, iph->saddr,
644                                            0, 0,
645                                            IPPROTO_IPIP,
646                                            RT_TOS(iph->tos),
647                                            tunnel->parms.link);
648                 if (!IS_ERR(rt)) {
649                         tdev = rt->dst.dev;
650                         ip_rt_put(rt);
651                 }
652                 dev->flags |= IFF_POINTOPOINT;
653         }
654
655         if (!tdev && tunnel->parms.link)
656                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
657
658         if (tdev) {
659                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
660                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
661         }
662         dev->iflink = tunnel->parms.link;
663 }
664
665 static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
666 {
667         struct net *net = dev_net(t->dev);
668         struct ipip_net *ipn = net_generic(net, ipip_net_id);
669
670         ipip_tunnel_unlink(ipn, t);
671         synchronize_net();
672         t->parms.iph.saddr = p->iph.saddr;
673         t->parms.iph.daddr = p->iph.daddr;
674         memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
675         memcpy(t->dev->broadcast, &p->iph.daddr, 4);
676         ipip_tunnel_link(ipn, t);
677         t->parms.iph.ttl = p->iph.ttl;
678         t->parms.iph.tos = p->iph.tos;
679         t->parms.iph.frag_off = p->iph.frag_off;
680         if (t->parms.link != p->link) {
681                 t->parms.link = p->link;
682                 ipip_tunnel_bind_dev(t->dev);
683         }
684         netdev_state_change(t->dev);
685 }
686
687 static int
688 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
689 {
690         int err = 0;
691         struct ip_tunnel_parm p;
692         struct ip_tunnel *t;
693         struct net *net = dev_net(dev);
694         struct ipip_net *ipn = net_generic(net, ipip_net_id);
695
696         switch (cmd) {
697         case SIOCGETTUNNEL:
698                 t = NULL;
699                 if (dev == ipn->fb_tunnel_dev) {
700                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
701                                 err = -EFAULT;
702                                 break;
703                         }
704                         t = ipip_tunnel_locate(net, &p, 0);
705                 }
706                 if (t == NULL)
707                         t = netdev_priv(dev);
708                 memcpy(&p, &t->parms, sizeof(p));
709                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
710                         err = -EFAULT;
711                 break;
712
713         case SIOCADDTUNNEL:
714         case SIOCCHGTUNNEL:
715                 err = -EPERM;
716                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
717                         goto done;
718
719                 err = -EFAULT;
720                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
721                         goto done;
722
723                 err = -EINVAL;
724                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
725                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
726                         goto done;
727                 if (p.iph.ttl)
728                         p.iph.frag_off |= htons(IP_DF);
729
730                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
731
732                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
733                         if (t != NULL) {
734                                 if (t->dev != dev) {
735                                         err = -EEXIST;
736                                         break;
737                                 }
738                         } else {
739                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
740                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
741                                         err = -EINVAL;
742                                         break;
743                                 }
744                                 t = netdev_priv(dev);
745                         }
746
747                         ipip_tunnel_update(t, &p);
748                 }
749
750                 if (t) {
751                         err = 0;
752                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
753                                 err = -EFAULT;
754                 } else
755                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
756                 break;
757
758         case SIOCDELTUNNEL:
759                 err = -EPERM;
760                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
761                         goto done;
762
763                 if (dev == ipn->fb_tunnel_dev) {
764                         err = -EFAULT;
765                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
766                                 goto done;
767                         err = -ENOENT;
768                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
769                                 goto done;
770                         err = -EPERM;
771                         if (t->dev == ipn->fb_tunnel_dev)
772                                 goto done;
773                         dev = t->dev;
774                 }
775                 unregister_netdevice(dev);
776                 err = 0;
777                 break;
778
779         default:
780                 err = -EINVAL;
781         }
782
783 done:
784         return err;
785 }
786
787 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
788 {
789         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
790                 return -EINVAL;
791         dev->mtu = new_mtu;
792         return 0;
793 }
794
795 static const struct net_device_ops ipip_netdev_ops = {
796         .ndo_uninit     = ipip_tunnel_uninit,
797         .ndo_start_xmit = ipip_tunnel_xmit,
798         .ndo_do_ioctl   = ipip_tunnel_ioctl,
799         .ndo_change_mtu = ipip_tunnel_change_mtu,
800         .ndo_get_stats64 = ipip_get_stats64,
801 };
802
803 static void ipip_dev_free(struct net_device *dev)
804 {
805         free_percpu(dev->tstats);
806         free_netdev(dev);
807 }
808
809 #define IPIP_FEATURES (NETIF_F_SG |             \
810                        NETIF_F_FRAGLIST |       \
811                        NETIF_F_HIGHDMA |        \
812                        NETIF_F_HW_CSUM)
813
814 static void ipip_tunnel_setup(struct net_device *dev)
815 {
816         dev->netdev_ops         = &ipip_netdev_ops;
817         dev->destructor         = ipip_dev_free;
818
819         dev->type               = ARPHRD_TUNNEL;
820         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
821         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
822         dev->flags              = IFF_NOARP;
823         dev->iflink             = 0;
824         dev->addr_len           = 4;
825         dev->features           |= NETIF_F_NETNS_LOCAL;
826         dev->features           |= NETIF_F_LLTX;
827         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
828
829         dev->features           |= IPIP_FEATURES;
830         dev->hw_features        |= IPIP_FEATURES;
831 }
832
833 static int ipip_tunnel_init(struct net_device *dev)
834 {
835         struct ip_tunnel *tunnel = netdev_priv(dev);
836
837         tunnel->dev = dev;
838
839         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
840         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
841
842         ipip_tunnel_bind_dev(dev);
843
844         dev->tstats = alloc_percpu(struct pcpu_tstats);
845         if (!dev->tstats)
846                 return -ENOMEM;
847
848         return 0;
849 }
850
851 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
852 {
853         struct ip_tunnel *tunnel = netdev_priv(dev);
854         struct iphdr *iph = &tunnel->parms.iph;
855         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
856
857         tunnel->dev = dev;
858         strcpy(tunnel->parms.name, dev->name);
859
860         iph->version            = 4;
861         iph->protocol           = IPPROTO_IPIP;
862         iph->ihl                = 5;
863
864         dev->tstats = alloc_percpu(struct pcpu_tstats);
865         if (!dev->tstats)
866                 return -ENOMEM;
867
868         dev_hold(dev);
869         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
870         return 0;
871 }
872
873 static void ipip_netlink_parms(struct nlattr *data[],
874                                struct ip_tunnel_parm *parms)
875 {
876         memset(parms, 0, sizeof(*parms));
877
878         parms->iph.version = 4;
879         parms->iph.protocol = IPPROTO_IPIP;
880         parms->iph.ihl = 5;
881
882         if (!data)
883                 return;
884
885         if (data[IFLA_IPTUN_LINK])
886                 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
887
888         if (data[IFLA_IPTUN_LOCAL])
889                 parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
890
891         if (data[IFLA_IPTUN_REMOTE])
892                 parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
893
894         if (data[IFLA_IPTUN_TTL]) {
895                 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
896                 if (parms->iph.ttl)
897                         parms->iph.frag_off = htons(IP_DF);
898         }
899
900         if (data[IFLA_IPTUN_TOS])
901                 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
902
903         if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
904                 parms->iph.frag_off = htons(IP_DF);
905 }
906
907 static int ipip_newlink(struct net *src_net, struct net_device *dev,
908                         struct nlattr *tb[], struct nlattr *data[])
909 {
910         struct net *net = dev_net(dev);
911         struct ip_tunnel *nt;
912
913         nt = netdev_priv(dev);
914         ipip_netlink_parms(data, &nt->parms);
915
916         if (ipip_tunnel_locate(net, &nt->parms, 0))
917                 return -EEXIST;
918
919         return ipip_tunnel_create(dev);
920 }
921
922 static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
923                            struct nlattr *data[])
924 {
925         struct ip_tunnel *t;
926         struct ip_tunnel_parm p;
927         struct net *net = dev_net(dev);
928         struct ipip_net *ipn = net_generic(net, ipip_net_id);
929
930         if (dev == ipn->fb_tunnel_dev)
931                 return -EINVAL;
932
933         ipip_netlink_parms(data, &p);
934
935         if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
936             (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
937                 return -EINVAL;
938
939         t = ipip_tunnel_locate(net, &p, 0);
940
941         if (t) {
942                 if (t->dev != dev)
943                         return -EEXIST;
944         } else
945                 t = netdev_priv(dev);
946
947         ipip_tunnel_update(t, &p);
948         return 0;
949 }
950
951 static size_t ipip_get_size(const struct net_device *dev)
952 {
953         return
954                 /* IFLA_IPTUN_LINK */
955                 nla_total_size(4) +
956                 /* IFLA_IPTUN_LOCAL */
957                 nla_total_size(4) +
958                 /* IFLA_IPTUN_REMOTE */
959                 nla_total_size(4) +
960                 /* IFLA_IPTUN_TTL */
961                 nla_total_size(1) +
962                 /* IFLA_IPTUN_TOS */
963                 nla_total_size(1) +
964                 /* IFLA_IPTUN_PMTUDISC */
965                 nla_total_size(1) +
966                 0;
967 }
968
969 static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
970 {
971         struct ip_tunnel *tunnel = netdev_priv(dev);
972         struct ip_tunnel_parm *parm = &tunnel->parms;
973
974         if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
975             nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
976             nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
977             nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
978             nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
979             nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
980                        !!(parm->iph.frag_off & htons(IP_DF))))
981                 goto nla_put_failure;
982         return 0;
983
984 nla_put_failure:
985         return -EMSGSIZE;
986 }
987
988 static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
989         [IFLA_IPTUN_LINK]               = { .type = NLA_U32 },
990         [IFLA_IPTUN_LOCAL]              = { .type = NLA_U32 },
991         [IFLA_IPTUN_REMOTE]             = { .type = NLA_U32 },
992         [IFLA_IPTUN_TTL]                = { .type = NLA_U8 },
993         [IFLA_IPTUN_TOS]                = { .type = NLA_U8 },
994         [IFLA_IPTUN_PMTUDISC]           = { .type = NLA_U8 },
995 };
996
997 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
998         .kind           = "ipip",
999         .maxtype        = IFLA_IPTUN_MAX,
1000         .policy         = ipip_policy,
1001         .priv_size      = sizeof(struct ip_tunnel),
1002         .setup          = ipip_tunnel_setup,
1003         .newlink        = ipip_newlink,
1004         .changelink     = ipip_changelink,
1005         .get_size       = ipip_get_size,
1006         .fill_info      = ipip_fill_info,
1007 };
1008
1009 static struct xfrm_tunnel ipip_handler __read_mostly = {
1010         .handler        =       ipip_rcv,
1011         .err_handler    =       ipip_err,
1012         .priority       =       1,
1013 };
1014
1015 static const char banner[] __initconst =
1016         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
1017
1018 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
1019 {
1020         int prio;
1021
1022         for (prio = 1; prio < 4; prio++) {
1023                 int h;
1024                 for (h = 0; h < HASH_SIZE; h++) {
1025                         struct ip_tunnel *t;
1026
1027                         t = rtnl_dereference(ipn->tunnels[prio][h]);
1028                         while (t != NULL) {
1029                                 unregister_netdevice_queue(t->dev, head);
1030                                 t = rtnl_dereference(t->next);
1031                         }
1032                 }
1033         }
1034 }
1035
1036 static int __net_init ipip_init_net(struct net *net)
1037 {
1038         struct ipip_net *ipn = net_generic(net, ipip_net_id);
1039         struct ip_tunnel *t;
1040         int err;
1041
1042         ipn->tunnels[0] = ipn->tunnels_wc;
1043         ipn->tunnels[1] = ipn->tunnels_l;
1044         ipn->tunnels[2] = ipn->tunnels_r;
1045         ipn->tunnels[3] = ipn->tunnels_r_l;
1046
1047         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
1048                                            "tunl0",
1049                                            ipip_tunnel_setup);
1050         if (!ipn->fb_tunnel_dev) {
1051                 err = -ENOMEM;
1052                 goto err_alloc_dev;
1053         }
1054         dev_net_set(ipn->fb_tunnel_dev, net);
1055
1056         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
1057         if (err)
1058                 goto err_reg_dev;
1059
1060         if ((err = register_netdev(ipn->fb_tunnel_dev)))
1061                 goto err_reg_dev;
1062
1063         t = netdev_priv(ipn->fb_tunnel_dev);
1064
1065         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
1066         return 0;
1067
1068 err_reg_dev:
1069         ipip_dev_free(ipn->fb_tunnel_dev);
1070 err_alloc_dev:
1071         /* nothing */
1072         return err;
1073 }
1074
1075 static void __net_exit ipip_exit_net(struct net *net)
1076 {
1077         struct ipip_net *ipn = net_generic(net, ipip_net_id);
1078         LIST_HEAD(list);
1079
1080         rtnl_lock();
1081         ipip_destroy_tunnels(ipn, &list);
1082         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
1083         unregister_netdevice_many(&list);
1084         rtnl_unlock();
1085 }
1086
1087 static struct pernet_operations ipip_net_ops = {
1088         .init = ipip_init_net,
1089         .exit = ipip_exit_net,
1090         .id   = &ipip_net_id,
1091         .size = sizeof(struct ipip_net),
1092 };
1093
1094 static int __init ipip_init(void)
1095 {
1096         int err;
1097
1098         printk(banner);
1099
1100         err = register_pernet_device(&ipip_net_ops);
1101         if (err < 0)
1102                 return err;
1103         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
1104         if (err < 0) {
1105                 pr_info("%s: can't register tunnel\n", __func__);
1106                 goto xfrm_tunnel_failed;
1107         }
1108         err = rtnl_link_register(&ipip_link_ops);
1109         if (err < 0)
1110                 goto rtnl_link_failed;
1111
1112 out:
1113         return err;
1114
1115 rtnl_link_failed:
1116         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1117 xfrm_tunnel_failed:
1118         unregister_pernet_device(&ipip_net_ops);
1119         goto out;
1120 }
1121
1122 static void __exit ipip_fini(void)
1123 {
1124         rtnl_link_unregister(&ipip_link_ops);
1125         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
1126                 pr_info("%s: can't deregister tunnel\n", __func__);
1127
1128         unregister_pernet_device(&ipip_net_ops);
1129 }
1130
1131 module_init(ipip_init);
1132 module_exit(ipip_fini);
1133 MODULE_LICENSE("GPL");
1134 MODULE_ALIAS_NETDEV("tunl0");