ip_tunnel: Set network header properly for IP_ECN_decapsulate()
[platform/adaptation/renesas_rcar/renesas_kernel.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst)
73 {
74         struct dst_entry *old_dst;
75
76         if (dst) {
77                 if (dst->flags & DST_NOCACHE)
78                         dst = NULL;
79                 else
80                         dst_clone(dst);
81         }
82         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83         dst_release(old_dst);
84 }
85
86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87 {
88         __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
89 }
90
91 static void tunnel_dst_reset(struct ip_tunnel *t)
92 {
93         tunnel_dst_set(t, NULL);
94 }
95
96 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
97 {
98         int i;
99
100         for_each_possible_cpu(i)
101                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102 }
103 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
104
105 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
106 {
107         struct dst_entry *dst;
108
109         rcu_read_lock();
110         dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
111         if (dst) {
112                 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
113                         rcu_read_unlock();
114                         tunnel_dst_reset(t);
115                         return NULL;
116                 }
117                 dst_hold(dst);
118         }
119         rcu_read_unlock();
120         return (struct rtable *)dst;
121 }
122
123 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
124                                 __be16 flags, __be32 key)
125 {
126         if (p->i_flags & TUNNEL_KEY) {
127                 if (flags & TUNNEL_KEY)
128                         return key == p->i_key;
129                 else
130                         /* key expected, none present */
131                         return false;
132         } else
133                 return !(flags & TUNNEL_KEY);
134 }
135
136 /* Fallback tunnel: no source, no destination, no key, no options
137
138    Tunnel hash table:
139    We require exact key match i.e. if a key is present in packet
140    it will match only tunnel with the same key; if it is not present,
141    it will match only keyless tunnel.
142
143    All keysless packets, if not matched configured keyless tunnels
144    will match fallback tunnel.
145    Given src, dst and key, find appropriate for input tunnel.
146 */
147 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
148                                    int link, __be16 flags,
149                                    __be32 remote, __be32 local,
150                                    __be32 key)
151 {
152         unsigned int hash;
153         struct ip_tunnel *t, *cand = NULL;
154         struct hlist_head *head;
155
156         hash = ip_tunnel_hash(key, remote);
157         head = &itn->tunnels[hash];
158
159         hlist_for_each_entry_rcu(t, head, hash_node) {
160                 if (local != t->parms.iph.saddr ||
161                     remote != t->parms.iph.daddr ||
162                     !(t->dev->flags & IFF_UP))
163                         continue;
164
165                 if (!ip_tunnel_key_match(&t->parms, flags, key))
166                         continue;
167
168                 if (t->parms.link == link)
169                         return t;
170                 else
171                         cand = t;
172         }
173
174         hlist_for_each_entry_rcu(t, head, hash_node) {
175                 if (remote != t->parms.iph.daddr ||
176                     !(t->dev->flags & IFF_UP))
177                         continue;
178
179                 if (!ip_tunnel_key_match(&t->parms, flags, key))
180                         continue;
181
182                 if (t->parms.link == link)
183                         return t;
184                 else if (!cand)
185                         cand = t;
186         }
187
188         hash = ip_tunnel_hash(key, 0);
189         head = &itn->tunnels[hash];
190
191         hlist_for_each_entry_rcu(t, head, hash_node) {
192                 if ((local != t->parms.iph.saddr &&
193                      (local != t->parms.iph.daddr ||
194                       !ipv4_is_multicast(local))) ||
195                     !(t->dev->flags & IFF_UP))
196                         continue;
197
198                 if (!ip_tunnel_key_match(&t->parms, flags, key))
199                         continue;
200
201                 if (t->parms.link == link)
202                         return t;
203                 else if (!cand)
204                         cand = t;
205         }
206
207         if (flags & TUNNEL_NO_KEY)
208                 goto skip_key_lookup;
209
210         hlist_for_each_entry_rcu(t, head, hash_node) {
211                 if (t->parms.i_key != key ||
212                     !(t->dev->flags & IFF_UP))
213                         continue;
214
215                 if (t->parms.link == link)
216                         return t;
217                 else if (!cand)
218                         cand = t;
219         }
220
221 skip_key_lookup:
222         if (cand)
223                 return cand;
224
225         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
226                 return netdev_priv(itn->fb_tunnel_dev);
227
228
229         return NULL;
230 }
231 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
232
233 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
234                                     struct ip_tunnel_parm *parms)
235 {
236         unsigned int h;
237         __be32 remote;
238
239         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
240                 remote = parms->iph.daddr;
241         else
242                 remote = 0;
243
244         h = ip_tunnel_hash(parms->i_key, remote);
245         return &itn->tunnels[h];
246 }
247
248 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
249 {
250         struct hlist_head *head = ip_bucket(itn, &t->parms);
251
252         hlist_add_head_rcu(&t->hash_node, head);
253 }
254
255 static void ip_tunnel_del(struct ip_tunnel *t)
256 {
257         hlist_del_init_rcu(&t->hash_node);
258 }
259
260 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
261                                         struct ip_tunnel_parm *parms,
262                                         int type)
263 {
264         __be32 remote = parms->iph.daddr;
265         __be32 local = parms->iph.saddr;
266         __be32 key = parms->i_key;
267         int link = parms->link;
268         struct ip_tunnel *t = NULL;
269         struct hlist_head *head = ip_bucket(itn, parms);
270
271         hlist_for_each_entry_rcu(t, head, hash_node) {
272                 if (local == t->parms.iph.saddr &&
273                     remote == t->parms.iph.daddr &&
274                     key == t->parms.i_key &&
275                     link == t->parms.link &&
276                     type == t->dev->type)
277                         break;
278         }
279         return t;
280 }
281
282 static struct net_device *__ip_tunnel_create(struct net *net,
283                                              const struct rtnl_link_ops *ops,
284                                              struct ip_tunnel_parm *parms)
285 {
286         int err;
287         struct ip_tunnel *tunnel;
288         struct net_device *dev;
289         char name[IFNAMSIZ];
290
291         if (parms->name[0])
292                 strlcpy(name, parms->name, IFNAMSIZ);
293         else {
294                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
295                         err = -E2BIG;
296                         goto failed;
297                 }
298                 strlcpy(name, ops->kind, IFNAMSIZ);
299                 strncat(name, "%d", 2);
300         }
301
302         ASSERT_RTNL();
303         dev = alloc_netdev(ops->priv_size, name, ops->setup);
304         if (!dev) {
305                 err = -ENOMEM;
306                 goto failed;
307         }
308         dev_net_set(dev, net);
309
310         dev->rtnl_link_ops = ops;
311
312         tunnel = netdev_priv(dev);
313         tunnel->parms = *parms;
314         tunnel->net = net;
315
316         err = register_netdevice(dev);
317         if (err)
318                 goto failed_free;
319
320         return dev;
321
322 failed_free:
323         free_netdev(dev);
324 failed:
325         return ERR_PTR(err);
326 }
327
328 static inline void init_tunnel_flow(struct flowi4 *fl4,
329                                     int proto,
330                                     __be32 daddr, __be32 saddr,
331                                     __be32 key, __u8 tos, int oif)
332 {
333         memset(fl4, 0, sizeof(*fl4));
334         fl4->flowi4_oif = oif;
335         fl4->daddr = daddr;
336         fl4->saddr = saddr;
337         fl4->flowi4_tos = tos;
338         fl4->flowi4_proto = proto;
339         fl4->fl4_gre_key = key;
340 }
341
342 static int ip_tunnel_bind_dev(struct net_device *dev)
343 {
344         struct net_device *tdev = NULL;
345         struct ip_tunnel *tunnel = netdev_priv(dev);
346         const struct iphdr *iph;
347         int hlen = LL_MAX_HEADER;
348         int mtu = ETH_DATA_LEN;
349         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
350
351         iph = &tunnel->parms.iph;
352
353         /* Guess output device to choose reasonable mtu and needed_headroom */
354         if (iph->daddr) {
355                 struct flowi4 fl4;
356                 struct rtable *rt;
357
358                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
359                                  iph->saddr, tunnel->parms.o_key,
360                                  RT_TOS(iph->tos), tunnel->parms.link);
361                 rt = ip_route_output_key(tunnel->net, &fl4);
362
363                 if (!IS_ERR(rt)) {
364                         tdev = rt->dst.dev;
365                         tunnel_dst_set(tunnel, &rt->dst);
366                         ip_rt_put(rt);
367                 }
368                 if (dev->type != ARPHRD_ETHER)
369                         dev->flags |= IFF_POINTOPOINT;
370         }
371
372         if (!tdev && tunnel->parms.link)
373                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
374
375         if (tdev) {
376                 hlen = tdev->hard_header_len + tdev->needed_headroom;
377                 mtu = tdev->mtu;
378         }
379         dev->iflink = tunnel->parms.link;
380
381         dev->needed_headroom = t_hlen + hlen;
382         mtu -= (dev->hard_header_len + t_hlen);
383
384         if (mtu < 68)
385                 mtu = 68;
386
387         return mtu;
388 }
389
390 static struct ip_tunnel *ip_tunnel_create(struct net *net,
391                                           struct ip_tunnel_net *itn,
392                                           struct ip_tunnel_parm *parms)
393 {
394         struct ip_tunnel *nt, *fbt;
395         struct net_device *dev;
396
397         BUG_ON(!itn->fb_tunnel_dev);
398         fbt = netdev_priv(itn->fb_tunnel_dev);
399         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
400         if (IS_ERR(dev))
401                 return NULL;
402
403         dev->mtu = ip_tunnel_bind_dev(dev);
404
405         nt = netdev_priv(dev);
406         ip_tunnel_add(itn, nt);
407         return nt;
408 }
409
410 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
411                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
412 {
413         struct pcpu_sw_netstats *tstats;
414         const struct iphdr *iph = ip_hdr(skb);
415         int err;
416
417 #ifdef CONFIG_NET_IPGRE_BROADCAST
418         if (ipv4_is_multicast(iph->daddr)) {
419                 tunnel->dev->stats.multicast++;
420                 skb->pkt_type = PACKET_BROADCAST;
421         }
422 #endif
423
424         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
425              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
426                 tunnel->dev->stats.rx_crc_errors++;
427                 tunnel->dev->stats.rx_errors++;
428                 goto drop;
429         }
430
431         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
432                 if (!(tpi->flags&TUNNEL_SEQ) ||
433                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
434                         tunnel->dev->stats.rx_fifo_errors++;
435                         tunnel->dev->stats.rx_errors++;
436                         goto drop;
437                 }
438                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
439         }
440
441         skb_reset_network_header(skb);
442
443         err = IP_ECN_decapsulate(iph, skb);
444         if (unlikely(err)) {
445                 if (log_ecn_error)
446                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
447                                         &iph->saddr, iph->tos);
448                 if (err > 1) {
449                         ++tunnel->dev->stats.rx_frame_errors;
450                         ++tunnel->dev->stats.rx_errors;
451                         goto drop;
452                 }
453         }
454
455         tstats = this_cpu_ptr(tunnel->dev->tstats);
456         u64_stats_update_begin(&tstats->syncp);
457         tstats->rx_packets++;
458         tstats->rx_bytes += skb->len;
459         u64_stats_update_end(&tstats->syncp);
460
461         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
462
463         if (tunnel->dev->type == ARPHRD_ETHER) {
464                 skb->protocol = eth_type_trans(skb, tunnel->dev);
465                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
466         } else {
467                 skb->dev = tunnel->dev;
468         }
469
470         gro_cells_receive(&tunnel->gro_cells, skb);
471         return 0;
472
473 drop:
474         kfree_skb(skb);
475         return 0;
476 }
477 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
478
479 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
480                             struct rtable *rt, __be16 df)
481 {
482         struct ip_tunnel *tunnel = netdev_priv(dev);
483         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
484         int mtu;
485
486         if (df)
487                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
488                                         - sizeof(struct iphdr) - tunnel->hlen;
489         else
490                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
491
492         if (skb_dst(skb))
493                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
494
495         if (skb->protocol == htons(ETH_P_IP)) {
496                 if (!skb_is_gso(skb) &&
497                     (df & htons(IP_DF)) && mtu < pkt_size) {
498                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
499                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
500                         return -E2BIG;
501                 }
502         }
503 #if IS_ENABLED(CONFIG_IPV6)
504         else if (skb->protocol == htons(ETH_P_IPV6)) {
505                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
506
507                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
508                            mtu >= IPV6_MIN_MTU) {
509                         if ((tunnel->parms.iph.daddr &&
510                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
511                             rt6->rt6i_dst.plen == 128) {
512                                 rt6->rt6i_flags |= RTF_MODIFIED;
513                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
514                         }
515                 }
516
517                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
518                                         mtu < pkt_size) {
519                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
520                         return -E2BIG;
521                 }
522         }
523 #endif
524         return 0;
525 }
526
527 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
528                     const struct iphdr *tnl_params, const u8 protocol)
529 {
530         struct ip_tunnel *tunnel = netdev_priv(dev);
531         const struct iphdr *inner_iph;
532         struct flowi4 fl4;
533         u8     tos, ttl;
534         __be16 df;
535         struct rtable *rt;              /* Route to the other host */
536         unsigned int max_headroom;      /* The extra header space needed */
537         __be32 dst;
538         int err;
539         bool connected = true;
540
541         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
542
543         dst = tnl_params->daddr;
544         if (dst == 0) {
545                 /* NBMA tunnel */
546
547                 if (skb_dst(skb) == NULL) {
548                         dev->stats.tx_fifo_errors++;
549                         goto tx_error;
550                 }
551
552                 if (skb->protocol == htons(ETH_P_IP)) {
553                         rt = skb_rtable(skb);
554                         dst = rt_nexthop(rt, inner_iph->daddr);
555                 }
556 #if IS_ENABLED(CONFIG_IPV6)
557                 else if (skb->protocol == htons(ETH_P_IPV6)) {
558                         const struct in6_addr *addr6;
559                         struct neighbour *neigh;
560                         bool do_tx_error_icmp;
561                         int addr_type;
562
563                         neigh = dst_neigh_lookup(skb_dst(skb),
564                                                  &ipv6_hdr(skb)->daddr);
565                         if (neigh == NULL)
566                                 goto tx_error;
567
568                         addr6 = (const struct in6_addr *)&neigh->primary_key;
569                         addr_type = ipv6_addr_type(addr6);
570
571                         if (addr_type == IPV6_ADDR_ANY) {
572                                 addr6 = &ipv6_hdr(skb)->daddr;
573                                 addr_type = ipv6_addr_type(addr6);
574                         }
575
576                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
577                                 do_tx_error_icmp = true;
578                         else {
579                                 do_tx_error_icmp = false;
580                                 dst = addr6->s6_addr32[3];
581                         }
582                         neigh_release(neigh);
583                         if (do_tx_error_icmp)
584                                 goto tx_error_icmp;
585                 }
586 #endif
587                 else
588                         goto tx_error;
589
590                 connected = false;
591         }
592
593         tos = tnl_params->tos;
594         if (tos & 0x1) {
595                 tos &= ~0x1;
596                 if (skb->protocol == htons(ETH_P_IP)) {
597                         tos = inner_iph->tos;
598                         connected = false;
599                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
600                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
601                         connected = false;
602                 }
603         }
604
605         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
606                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
607
608         rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
609
610         if (!rt) {
611                 rt = ip_route_output_key(tunnel->net, &fl4);
612
613                 if (IS_ERR(rt)) {
614                         dev->stats.tx_carrier_errors++;
615                         goto tx_error;
616                 }
617                 if (connected)
618                         tunnel_dst_set(tunnel, &rt->dst);
619         }
620
621         if (rt->dst.dev == dev) {
622                 ip_rt_put(rt);
623                 dev->stats.collisions++;
624                 goto tx_error;
625         }
626
627         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
628                 ip_rt_put(rt);
629                 goto tx_error;
630         }
631
632         if (tunnel->err_count > 0) {
633                 if (time_before(jiffies,
634                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
635                         tunnel->err_count--;
636
637                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
638                         dst_link_failure(skb);
639                 } else
640                         tunnel->err_count = 0;
641         }
642
643         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
644         ttl = tnl_params->ttl;
645         if (ttl == 0) {
646                 if (skb->protocol == htons(ETH_P_IP))
647                         ttl = inner_iph->ttl;
648 #if IS_ENABLED(CONFIG_IPV6)
649                 else if (skb->protocol == htons(ETH_P_IPV6))
650                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
651 #endif
652                 else
653                         ttl = ip4_dst_hoplimit(&rt->dst);
654         }
655
656         df = tnl_params->frag_off;
657         if (skb->protocol == htons(ETH_P_IP))
658                 df |= (inner_iph->frag_off&htons(IP_DF));
659
660         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
661                         + rt->dst.header_len;
662         if (max_headroom > dev->needed_headroom)
663                 dev->needed_headroom = max_headroom;
664
665         if (skb_cow_head(skb, dev->needed_headroom)) {
666                 dev->stats.tx_dropped++;
667                 kfree_skb(skb);
668                 return;
669         }
670
671         err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
672                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
673         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
674
675         return;
676
677 #if IS_ENABLED(CONFIG_IPV6)
678 tx_error_icmp:
679         dst_link_failure(skb);
680 #endif
681 tx_error:
682         dev->stats.tx_errors++;
683         kfree_skb(skb);
684 }
685 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
686
687 static void ip_tunnel_update(struct ip_tunnel_net *itn,
688                              struct ip_tunnel *t,
689                              struct net_device *dev,
690                              struct ip_tunnel_parm *p,
691                              bool set_mtu)
692 {
693         ip_tunnel_del(t);
694         t->parms.iph.saddr = p->iph.saddr;
695         t->parms.iph.daddr = p->iph.daddr;
696         t->parms.i_key = p->i_key;
697         t->parms.o_key = p->o_key;
698         if (dev->type != ARPHRD_ETHER) {
699                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
700                 memcpy(dev->broadcast, &p->iph.daddr, 4);
701         }
702         ip_tunnel_add(itn, t);
703
704         t->parms.iph.ttl = p->iph.ttl;
705         t->parms.iph.tos = p->iph.tos;
706         t->parms.iph.frag_off = p->iph.frag_off;
707
708         if (t->parms.link != p->link) {
709                 int mtu;
710
711                 t->parms.link = p->link;
712                 mtu = ip_tunnel_bind_dev(dev);
713                 if (set_mtu)
714                         dev->mtu = mtu;
715         }
716         ip_tunnel_dst_reset_all(t);
717         netdev_state_change(dev);
718 }
719
720 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
721 {
722         int err = 0;
723         struct ip_tunnel *t;
724         struct net *net = dev_net(dev);
725         struct ip_tunnel *tunnel = netdev_priv(dev);
726         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
727
728         BUG_ON(!itn->fb_tunnel_dev);
729         switch (cmd) {
730         case SIOCGETTUNNEL:
731                 t = NULL;
732                 if (dev == itn->fb_tunnel_dev)
733                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
734                 if (t == NULL)
735                         t = netdev_priv(dev);
736                 memcpy(p, &t->parms, sizeof(*p));
737                 break;
738
739         case SIOCADDTUNNEL:
740         case SIOCCHGTUNNEL:
741                 err = -EPERM;
742                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
743                         goto done;
744                 if (p->iph.ttl)
745                         p->iph.frag_off |= htons(IP_DF);
746                 if (!(p->i_flags&TUNNEL_KEY))
747                         p->i_key = 0;
748                 if (!(p->o_flags&TUNNEL_KEY))
749                         p->o_key = 0;
750
751                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
752
753                 if (!t && (cmd == SIOCADDTUNNEL))
754                         t = ip_tunnel_create(net, itn, p);
755
756                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
757                         if (t != NULL) {
758                                 if (t->dev != dev) {
759                                         err = -EEXIST;
760                                         break;
761                                 }
762                         } else {
763                                 unsigned int nflags = 0;
764
765                                 if (ipv4_is_multicast(p->iph.daddr))
766                                         nflags = IFF_BROADCAST;
767                                 else if (p->iph.daddr)
768                                         nflags = IFF_POINTOPOINT;
769
770                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
771                                         err = -EINVAL;
772                                         break;
773                                 }
774
775                                 t = netdev_priv(dev);
776                         }
777                 }
778
779                 if (t) {
780                         err = 0;
781                         ip_tunnel_update(itn, t, dev, p, true);
782                 } else
783                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
784                 break;
785
786         case SIOCDELTUNNEL:
787                 err = -EPERM;
788                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
789                         goto done;
790
791                 if (dev == itn->fb_tunnel_dev) {
792                         err = -ENOENT;
793                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
794                         if (t == NULL)
795                                 goto done;
796                         err = -EPERM;
797                         if (t == netdev_priv(itn->fb_tunnel_dev))
798                                 goto done;
799                         dev = t->dev;
800                 }
801                 unregister_netdevice(dev);
802                 err = 0;
803                 break;
804
805         default:
806                 err = -EINVAL;
807         }
808
809 done:
810         return err;
811 }
812 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
813
814 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
815 {
816         struct ip_tunnel *tunnel = netdev_priv(dev);
817         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
818
819         if (new_mtu < 68 ||
820             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
821                 return -EINVAL;
822         dev->mtu = new_mtu;
823         return 0;
824 }
825 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
826
827 static void ip_tunnel_dev_free(struct net_device *dev)
828 {
829         struct ip_tunnel *tunnel = netdev_priv(dev);
830
831         gro_cells_destroy(&tunnel->gro_cells);
832         free_percpu(tunnel->dst_cache);
833         free_percpu(dev->tstats);
834         free_netdev(dev);
835 }
836
837 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
838 {
839         struct ip_tunnel *tunnel = netdev_priv(dev);
840         struct ip_tunnel_net *itn;
841
842         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
843
844         if (itn->fb_tunnel_dev != dev) {
845                 ip_tunnel_del(netdev_priv(dev));
846                 unregister_netdevice_queue(dev, head);
847         }
848 }
849 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
850
851 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
852                                   struct rtnl_link_ops *ops, char *devname)
853 {
854         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
855         struct ip_tunnel_parm parms;
856         unsigned int i;
857
858         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
859                 INIT_HLIST_HEAD(&itn->tunnels[i]);
860
861         if (!ops) {
862                 itn->fb_tunnel_dev = NULL;
863                 return 0;
864         }
865
866         memset(&parms, 0, sizeof(parms));
867         if (devname)
868                 strlcpy(parms.name, devname, IFNAMSIZ);
869
870         rtnl_lock();
871         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
872         /* FB netdevice is special: we have one, and only one per netns.
873          * Allowing to move it to another netns is clearly unsafe.
874          */
875         if (!IS_ERR(itn->fb_tunnel_dev)) {
876                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
877                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
878         }
879         rtnl_unlock();
880
881         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
882 }
883 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
884
885 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
886                               struct rtnl_link_ops *ops)
887 {
888         struct net *net = dev_net(itn->fb_tunnel_dev);
889         struct net_device *dev, *aux;
890         int h;
891
892         for_each_netdev_safe(net, dev, aux)
893                 if (dev->rtnl_link_ops == ops)
894                         unregister_netdevice_queue(dev, head);
895
896         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
897                 struct ip_tunnel *t;
898                 struct hlist_node *n;
899                 struct hlist_head *thead = &itn->tunnels[h];
900
901                 hlist_for_each_entry_safe(t, n, thead, hash_node)
902                         /* If dev is in the same netns, it has already
903                          * been added to the list by the previous loop.
904                          */
905                         if (!net_eq(dev_net(t->dev), net))
906                                 unregister_netdevice_queue(t->dev, head);
907         }
908 }
909
910 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
911 {
912         LIST_HEAD(list);
913
914         rtnl_lock();
915         ip_tunnel_destroy(itn, &list, ops);
916         unregister_netdevice_many(&list);
917         rtnl_unlock();
918 }
919 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
920
921 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
922                       struct ip_tunnel_parm *p)
923 {
924         struct ip_tunnel *nt;
925         struct net *net = dev_net(dev);
926         struct ip_tunnel_net *itn;
927         int mtu;
928         int err;
929
930         nt = netdev_priv(dev);
931         itn = net_generic(net, nt->ip_tnl_net_id);
932
933         if (ip_tunnel_find(itn, p, dev->type))
934                 return -EEXIST;
935
936         nt->net = net;
937         nt->parms = *p;
938         err = register_netdevice(dev);
939         if (err)
940                 goto out;
941
942         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
943                 eth_hw_addr_random(dev);
944
945         mtu = ip_tunnel_bind_dev(dev);
946         if (!tb[IFLA_MTU])
947                 dev->mtu = mtu;
948
949         ip_tunnel_add(itn, nt);
950
951 out:
952         return err;
953 }
954 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
955
956 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
957                          struct ip_tunnel_parm *p)
958 {
959         struct ip_tunnel *t;
960         struct ip_tunnel *tunnel = netdev_priv(dev);
961         struct net *net = tunnel->net;
962         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
963
964         if (dev == itn->fb_tunnel_dev)
965                 return -EINVAL;
966
967         t = ip_tunnel_find(itn, p, dev->type);
968
969         if (t) {
970                 if (t->dev != dev)
971                         return -EEXIST;
972         } else {
973                 t = tunnel;
974
975                 if (dev->type != ARPHRD_ETHER) {
976                         unsigned int nflags = 0;
977
978                         if (ipv4_is_multicast(p->iph.daddr))
979                                 nflags = IFF_BROADCAST;
980                         else if (p->iph.daddr)
981                                 nflags = IFF_POINTOPOINT;
982
983                         if ((dev->flags ^ nflags) &
984                             (IFF_POINTOPOINT | IFF_BROADCAST))
985                                 return -EINVAL;
986                 }
987         }
988
989         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
990         return 0;
991 }
992 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
993
994 int ip_tunnel_init(struct net_device *dev)
995 {
996         struct ip_tunnel *tunnel = netdev_priv(dev);
997         struct iphdr *iph = &tunnel->parms.iph;
998         int i, err;
999
1000         dev->destructor = ip_tunnel_dev_free;
1001         dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
1002         if (!dev->tstats)
1003                 return -ENOMEM;
1004
1005         for_each_possible_cpu(i) {
1006                 struct pcpu_sw_netstats *ipt_stats;
1007                 ipt_stats = per_cpu_ptr(dev->tstats, i);
1008                 u64_stats_init(&ipt_stats->syncp);
1009         }
1010
1011         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1012         if (!tunnel->dst_cache) {
1013                 free_percpu(dev->tstats);
1014                 return -ENOMEM;
1015         }
1016
1017         err = gro_cells_init(&tunnel->gro_cells, dev);
1018         if (err) {
1019                 free_percpu(tunnel->dst_cache);
1020                 free_percpu(dev->tstats);
1021                 return err;
1022         }
1023
1024         tunnel->dev = dev;
1025         tunnel->net = dev_net(dev);
1026         strcpy(tunnel->parms.name, dev->name);
1027         iph->version            = 4;
1028         iph->ihl                = 5;
1029
1030         return 0;
1031 }
1032 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1033
1034 void ip_tunnel_uninit(struct net_device *dev)
1035 {
1036         struct ip_tunnel *tunnel = netdev_priv(dev);
1037         struct net *net = tunnel->net;
1038         struct ip_tunnel_net *itn;
1039
1040         itn = net_generic(net, tunnel->ip_tnl_net_id);
1041         /* fb_tunnel_dev will be unregisted in net-exit call. */
1042         if (itn->fb_tunnel_dev != dev)
1043                 ip_tunnel_del(netdev_priv(dev));
1044
1045         ip_tunnel_dst_reset_all(tunnel);
1046 }
1047 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1048
1049 /* Do least required initialization, rest of init is done in tunnel_init call */
1050 void ip_tunnel_setup(struct net_device *dev, int net_id)
1051 {
1052         struct ip_tunnel *tunnel = netdev_priv(dev);
1053         tunnel->ip_tnl_net_id = net_id;
1054 }
1055 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1056
1057 MODULE_LICENSE("GPL");