8021q: fix a potential memory leak
[platform/adaptation/renesas_rcar/renesas_kernel.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst)
73 {
74         struct dst_entry *old_dst;
75
76         if (dst) {
77                 if (dst->flags & DST_NOCACHE)
78                         dst = NULL;
79                 else
80                         dst_clone(dst);
81         }
82         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83         dst_release(old_dst);
84 }
85
86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87 {
88         __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
89 }
90
91 static void tunnel_dst_reset(struct ip_tunnel *t)
92 {
93         tunnel_dst_set(t, NULL);
94 }
95
96 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
97 {
98         int i;
99
100         for_each_possible_cpu(i)
101                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102 }
103 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
104
105 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
106 {
107         struct dst_entry *dst;
108
109         rcu_read_lock();
110         dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
111         if (dst) {
112                 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
113                         rcu_read_unlock();
114                         tunnel_dst_reset(t);
115                         return NULL;
116                 }
117                 dst_hold(dst);
118         }
119         rcu_read_unlock();
120         return (struct rtable *)dst;
121 }
122
123 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
124                                 __be16 flags, __be32 key)
125 {
126         if (p->i_flags & TUNNEL_KEY) {
127                 if (flags & TUNNEL_KEY)
128                         return key == p->i_key;
129                 else
130                         /* key expected, none present */
131                         return false;
132         } else
133                 return !(flags & TUNNEL_KEY);
134 }
135
136 /* Fallback tunnel: no source, no destination, no key, no options
137
138    Tunnel hash table:
139    We require exact key match i.e. if a key is present in packet
140    it will match only tunnel with the same key; if it is not present,
141    it will match only keyless tunnel.
142
143    All keysless packets, if not matched configured keyless tunnels
144    will match fallback tunnel.
145    Given src, dst and key, find appropriate for input tunnel.
146 */
147 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
148                                    int link, __be16 flags,
149                                    __be32 remote, __be32 local,
150                                    __be32 key)
151 {
152         unsigned int hash;
153         struct ip_tunnel *t, *cand = NULL;
154         struct hlist_head *head;
155
156         hash = ip_tunnel_hash(key, remote);
157         head = &itn->tunnels[hash];
158
159         hlist_for_each_entry_rcu(t, head, hash_node) {
160                 if (local != t->parms.iph.saddr ||
161                     remote != t->parms.iph.daddr ||
162                     !(t->dev->flags & IFF_UP))
163                         continue;
164
165                 if (!ip_tunnel_key_match(&t->parms, flags, key))
166                         continue;
167
168                 if (t->parms.link == link)
169                         return t;
170                 else
171                         cand = t;
172         }
173
174         hlist_for_each_entry_rcu(t, head, hash_node) {
175                 if (remote != t->parms.iph.daddr ||
176                     t->parms.iph.saddr != 0 ||
177                     !(t->dev->flags & IFF_UP))
178                         continue;
179
180                 if (!ip_tunnel_key_match(&t->parms, flags, key))
181                         continue;
182
183                 if (t->parms.link == link)
184                         return t;
185                 else if (!cand)
186                         cand = t;
187         }
188
189         hash = ip_tunnel_hash(key, 0);
190         head = &itn->tunnels[hash];
191
192         hlist_for_each_entry_rcu(t, head, hash_node) {
193                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
194                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
195                         continue;
196
197                 if (!(t->dev->flags & IFF_UP))
198                         continue;
199
200                 if (!ip_tunnel_key_match(&t->parms, flags, key))
201                         continue;
202
203                 if (t->parms.link == link)
204                         return t;
205                 else if (!cand)
206                         cand = t;
207         }
208
209         if (flags & TUNNEL_NO_KEY)
210                 goto skip_key_lookup;
211
212         hlist_for_each_entry_rcu(t, head, hash_node) {
213                 if (t->parms.i_key != key ||
214                     t->parms.iph.saddr != 0 ||
215                     t->parms.iph.daddr != 0 ||
216                     !(t->dev->flags & IFF_UP))
217                         continue;
218
219                 if (t->parms.link == link)
220                         return t;
221                 else if (!cand)
222                         cand = t;
223         }
224
225 skip_key_lookup:
226         if (cand)
227                 return cand;
228
229         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
230                 return netdev_priv(itn->fb_tunnel_dev);
231
232
233         return NULL;
234 }
235 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
236
237 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
238                                     struct ip_tunnel_parm *parms)
239 {
240         unsigned int h;
241         __be32 remote;
242
243         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
244                 remote = parms->iph.daddr;
245         else
246                 remote = 0;
247
248         h = ip_tunnel_hash(parms->i_key, remote);
249         return &itn->tunnels[h];
250 }
251
252 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
253 {
254         struct hlist_head *head = ip_bucket(itn, &t->parms);
255
256         hlist_add_head_rcu(&t->hash_node, head);
257 }
258
259 static void ip_tunnel_del(struct ip_tunnel *t)
260 {
261         hlist_del_init_rcu(&t->hash_node);
262 }
263
264 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
265                                         struct ip_tunnel_parm *parms,
266                                         int type)
267 {
268         __be32 remote = parms->iph.daddr;
269         __be32 local = parms->iph.saddr;
270         __be32 key = parms->i_key;
271         int link = parms->link;
272         struct ip_tunnel *t = NULL;
273         struct hlist_head *head = ip_bucket(itn, parms);
274
275         hlist_for_each_entry_rcu(t, head, hash_node) {
276                 if (local == t->parms.iph.saddr &&
277                     remote == t->parms.iph.daddr &&
278                     key == t->parms.i_key &&
279                     link == t->parms.link &&
280                     type == t->dev->type)
281                         break;
282         }
283         return t;
284 }
285
286 static struct net_device *__ip_tunnel_create(struct net *net,
287                                              const struct rtnl_link_ops *ops,
288                                              struct ip_tunnel_parm *parms)
289 {
290         int err;
291         struct ip_tunnel *tunnel;
292         struct net_device *dev;
293         char name[IFNAMSIZ];
294
295         if (parms->name[0])
296                 strlcpy(name, parms->name, IFNAMSIZ);
297         else {
298                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
299                         err = -E2BIG;
300                         goto failed;
301                 }
302                 strlcpy(name, ops->kind, IFNAMSIZ);
303                 strncat(name, "%d", 2);
304         }
305
306         ASSERT_RTNL();
307         dev = alloc_netdev(ops->priv_size, name, ops->setup);
308         if (!dev) {
309                 err = -ENOMEM;
310                 goto failed;
311         }
312         dev_net_set(dev, net);
313
314         dev->rtnl_link_ops = ops;
315
316         tunnel = netdev_priv(dev);
317         tunnel->parms = *parms;
318         tunnel->net = net;
319
320         err = register_netdevice(dev);
321         if (err)
322                 goto failed_free;
323
324         return dev;
325
326 failed_free:
327         free_netdev(dev);
328 failed:
329         return ERR_PTR(err);
330 }
331
332 static inline void init_tunnel_flow(struct flowi4 *fl4,
333                                     int proto,
334                                     __be32 daddr, __be32 saddr,
335                                     __be32 key, __u8 tos, int oif)
336 {
337         memset(fl4, 0, sizeof(*fl4));
338         fl4->flowi4_oif = oif;
339         fl4->daddr = daddr;
340         fl4->saddr = saddr;
341         fl4->flowi4_tos = tos;
342         fl4->flowi4_proto = proto;
343         fl4->fl4_gre_key = key;
344 }
345
346 static int ip_tunnel_bind_dev(struct net_device *dev)
347 {
348         struct net_device *tdev = NULL;
349         struct ip_tunnel *tunnel = netdev_priv(dev);
350         const struct iphdr *iph;
351         int hlen = LL_MAX_HEADER;
352         int mtu = ETH_DATA_LEN;
353         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
354
355         iph = &tunnel->parms.iph;
356
357         /* Guess output device to choose reasonable mtu and needed_headroom */
358         if (iph->daddr) {
359                 struct flowi4 fl4;
360                 struct rtable *rt;
361
362                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
363                                  iph->saddr, tunnel->parms.o_key,
364                                  RT_TOS(iph->tos), tunnel->parms.link);
365                 rt = ip_route_output_key(tunnel->net, &fl4);
366
367                 if (!IS_ERR(rt)) {
368                         tdev = rt->dst.dev;
369                         tunnel_dst_set(tunnel, &rt->dst);
370                         ip_rt_put(rt);
371                 }
372                 if (dev->type != ARPHRD_ETHER)
373                         dev->flags |= IFF_POINTOPOINT;
374         }
375
376         if (!tdev && tunnel->parms.link)
377                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
378
379         if (tdev) {
380                 hlen = tdev->hard_header_len + tdev->needed_headroom;
381                 mtu = tdev->mtu;
382         }
383         dev->iflink = tunnel->parms.link;
384
385         dev->needed_headroom = t_hlen + hlen;
386         mtu -= (dev->hard_header_len + t_hlen);
387
388         if (mtu < 68)
389                 mtu = 68;
390
391         return mtu;
392 }
393
394 static struct ip_tunnel *ip_tunnel_create(struct net *net,
395                                           struct ip_tunnel_net *itn,
396                                           struct ip_tunnel_parm *parms)
397 {
398         struct ip_tunnel *nt, *fbt;
399         struct net_device *dev;
400
401         BUG_ON(!itn->fb_tunnel_dev);
402         fbt = netdev_priv(itn->fb_tunnel_dev);
403         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
404         if (IS_ERR(dev))
405                 return NULL;
406
407         dev->mtu = ip_tunnel_bind_dev(dev);
408
409         nt = netdev_priv(dev);
410         ip_tunnel_add(itn, nt);
411         return nt;
412 }
413
414 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
415                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
416 {
417         struct pcpu_sw_netstats *tstats;
418         const struct iphdr *iph = ip_hdr(skb);
419         int err;
420
421 #ifdef CONFIG_NET_IPGRE_BROADCAST
422         if (ipv4_is_multicast(iph->daddr)) {
423                 tunnel->dev->stats.multicast++;
424                 skb->pkt_type = PACKET_BROADCAST;
425         }
426 #endif
427
428         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430                 tunnel->dev->stats.rx_crc_errors++;
431                 tunnel->dev->stats.rx_errors++;
432                 goto drop;
433         }
434
435         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436                 if (!(tpi->flags&TUNNEL_SEQ) ||
437                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438                         tunnel->dev->stats.rx_fifo_errors++;
439                         tunnel->dev->stats.rx_errors++;
440                         goto drop;
441                 }
442                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443         }
444
445         skb_reset_network_header(skb);
446
447         err = IP_ECN_decapsulate(iph, skb);
448         if (unlikely(err)) {
449                 if (log_ecn_error)
450                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
451                                         &iph->saddr, iph->tos);
452                 if (err > 1) {
453                         ++tunnel->dev->stats.rx_frame_errors;
454                         ++tunnel->dev->stats.rx_errors;
455                         goto drop;
456                 }
457         }
458
459         tstats = this_cpu_ptr(tunnel->dev->tstats);
460         u64_stats_update_begin(&tstats->syncp);
461         tstats->rx_packets++;
462         tstats->rx_bytes += skb->len;
463         u64_stats_update_end(&tstats->syncp);
464
465         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
466
467         if (tunnel->dev->type == ARPHRD_ETHER) {
468                 skb->protocol = eth_type_trans(skb, tunnel->dev);
469                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
470         } else {
471                 skb->dev = tunnel->dev;
472         }
473
474         gro_cells_receive(&tunnel->gro_cells, skb);
475         return 0;
476
477 drop:
478         kfree_skb(skb);
479         return 0;
480 }
481 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
482
483 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
484                             struct rtable *rt, __be16 df)
485 {
486         struct ip_tunnel *tunnel = netdev_priv(dev);
487         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
488         int mtu;
489
490         if (df)
491                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
492                                         - sizeof(struct iphdr) - tunnel->hlen;
493         else
494                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
495
496         if (skb_dst(skb))
497                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
498
499         if (skb->protocol == htons(ETH_P_IP)) {
500                 if (!skb_is_gso(skb) &&
501                     (df & htons(IP_DF)) && mtu < pkt_size) {
502                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
503                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
504                         return -E2BIG;
505                 }
506         }
507 #if IS_ENABLED(CONFIG_IPV6)
508         else if (skb->protocol == htons(ETH_P_IPV6)) {
509                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
510
511                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
512                            mtu >= IPV6_MIN_MTU) {
513                         if ((tunnel->parms.iph.daddr &&
514                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
515                             rt6->rt6i_dst.plen == 128) {
516                                 rt6->rt6i_flags |= RTF_MODIFIED;
517                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
518                         }
519                 }
520
521                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
522                                         mtu < pkt_size) {
523                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
524                         return -E2BIG;
525                 }
526         }
527 #endif
528         return 0;
529 }
530
531 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
532                     const struct iphdr *tnl_params, const u8 protocol)
533 {
534         struct ip_tunnel *tunnel = netdev_priv(dev);
535         const struct iphdr *inner_iph;
536         struct flowi4 fl4;
537         u8     tos, ttl;
538         __be16 df;
539         struct rtable *rt;              /* Route to the other host */
540         unsigned int max_headroom;      /* The extra header space needed */
541         __be32 dst;
542         int err;
543         bool connected;
544
545         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
546         connected = (tunnel->parms.iph.daddr != 0);
547
548         dst = tnl_params->daddr;
549         if (dst == 0) {
550                 /* NBMA tunnel */
551
552                 if (skb_dst(skb) == NULL) {
553                         dev->stats.tx_fifo_errors++;
554                         goto tx_error;
555                 }
556
557                 if (skb->protocol == htons(ETH_P_IP)) {
558                         rt = skb_rtable(skb);
559                         dst = rt_nexthop(rt, inner_iph->daddr);
560                 }
561 #if IS_ENABLED(CONFIG_IPV6)
562                 else if (skb->protocol == htons(ETH_P_IPV6)) {
563                         const struct in6_addr *addr6;
564                         struct neighbour *neigh;
565                         bool do_tx_error_icmp;
566                         int addr_type;
567
568                         neigh = dst_neigh_lookup(skb_dst(skb),
569                                                  &ipv6_hdr(skb)->daddr);
570                         if (neigh == NULL)
571                                 goto tx_error;
572
573                         addr6 = (const struct in6_addr *)&neigh->primary_key;
574                         addr_type = ipv6_addr_type(addr6);
575
576                         if (addr_type == IPV6_ADDR_ANY) {
577                                 addr6 = &ipv6_hdr(skb)->daddr;
578                                 addr_type = ipv6_addr_type(addr6);
579                         }
580
581                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
582                                 do_tx_error_icmp = true;
583                         else {
584                                 do_tx_error_icmp = false;
585                                 dst = addr6->s6_addr32[3];
586                         }
587                         neigh_release(neigh);
588                         if (do_tx_error_icmp)
589                                 goto tx_error_icmp;
590                 }
591 #endif
592                 else
593                         goto tx_error;
594
595                 connected = false;
596         }
597
598         tos = tnl_params->tos;
599         if (tos & 0x1) {
600                 tos &= ~0x1;
601                 if (skb->protocol == htons(ETH_P_IP)) {
602                         tos = inner_iph->tos;
603                         connected = false;
604                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
605                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
606                         connected = false;
607                 }
608         }
609
610         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
611                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
612
613         rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
614
615         if (!rt) {
616                 rt = ip_route_output_key(tunnel->net, &fl4);
617
618                 if (IS_ERR(rt)) {
619                         dev->stats.tx_carrier_errors++;
620                         goto tx_error;
621                 }
622                 if (connected)
623                         tunnel_dst_set(tunnel, &rt->dst);
624         }
625
626         if (rt->dst.dev == dev) {
627                 ip_rt_put(rt);
628                 dev->stats.collisions++;
629                 goto tx_error;
630         }
631
632         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
633                 ip_rt_put(rt);
634                 goto tx_error;
635         }
636
637         if (tunnel->err_count > 0) {
638                 if (time_before(jiffies,
639                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
640                         tunnel->err_count--;
641
642                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
643                         dst_link_failure(skb);
644                 } else
645                         tunnel->err_count = 0;
646         }
647
648         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
649         ttl = tnl_params->ttl;
650         if (ttl == 0) {
651                 if (skb->protocol == htons(ETH_P_IP))
652                         ttl = inner_iph->ttl;
653 #if IS_ENABLED(CONFIG_IPV6)
654                 else if (skb->protocol == htons(ETH_P_IPV6))
655                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
656 #endif
657                 else
658                         ttl = ip4_dst_hoplimit(&rt->dst);
659         }
660
661         df = tnl_params->frag_off;
662         if (skb->protocol == htons(ETH_P_IP))
663                 df |= (inner_iph->frag_off&htons(IP_DF));
664
665         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
666                         + rt->dst.header_len;
667         if (max_headroom > dev->needed_headroom)
668                 dev->needed_headroom = max_headroom;
669
670         if (skb_cow_head(skb, dev->needed_headroom)) {
671                 dev->stats.tx_dropped++;
672                 kfree_skb(skb);
673                 return;
674         }
675
676         err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
677                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
678         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
679
680         return;
681
682 #if IS_ENABLED(CONFIG_IPV6)
683 tx_error_icmp:
684         dst_link_failure(skb);
685 #endif
686 tx_error:
687         dev->stats.tx_errors++;
688         kfree_skb(skb);
689 }
690 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
691
692 static void ip_tunnel_update(struct ip_tunnel_net *itn,
693                              struct ip_tunnel *t,
694                              struct net_device *dev,
695                              struct ip_tunnel_parm *p,
696                              bool set_mtu)
697 {
698         ip_tunnel_del(t);
699         t->parms.iph.saddr = p->iph.saddr;
700         t->parms.iph.daddr = p->iph.daddr;
701         t->parms.i_key = p->i_key;
702         t->parms.o_key = p->o_key;
703         if (dev->type != ARPHRD_ETHER) {
704                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
705                 memcpy(dev->broadcast, &p->iph.daddr, 4);
706         }
707         ip_tunnel_add(itn, t);
708
709         t->parms.iph.ttl = p->iph.ttl;
710         t->parms.iph.tos = p->iph.tos;
711         t->parms.iph.frag_off = p->iph.frag_off;
712
713         if (t->parms.link != p->link) {
714                 int mtu;
715
716                 t->parms.link = p->link;
717                 mtu = ip_tunnel_bind_dev(dev);
718                 if (set_mtu)
719                         dev->mtu = mtu;
720         }
721         ip_tunnel_dst_reset_all(t);
722         netdev_state_change(dev);
723 }
724
725 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
726 {
727         int err = 0;
728         struct ip_tunnel *t;
729         struct net *net = dev_net(dev);
730         struct ip_tunnel *tunnel = netdev_priv(dev);
731         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
732
733         BUG_ON(!itn->fb_tunnel_dev);
734         switch (cmd) {
735         case SIOCGETTUNNEL:
736                 t = NULL;
737                 if (dev == itn->fb_tunnel_dev)
738                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
739                 if (t == NULL)
740                         t = netdev_priv(dev);
741                 memcpy(p, &t->parms, sizeof(*p));
742                 break;
743
744         case SIOCADDTUNNEL:
745         case SIOCCHGTUNNEL:
746                 err = -EPERM;
747                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
748                         goto done;
749                 if (p->iph.ttl)
750                         p->iph.frag_off |= htons(IP_DF);
751                 if (!(p->i_flags&TUNNEL_KEY))
752                         p->i_key = 0;
753                 if (!(p->o_flags&TUNNEL_KEY))
754                         p->o_key = 0;
755
756                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
757
758                 if (!t && (cmd == SIOCADDTUNNEL))
759                         t = ip_tunnel_create(net, itn, p);
760
761                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
762                         if (t != NULL) {
763                                 if (t->dev != dev) {
764                                         err = -EEXIST;
765                                         break;
766                                 }
767                         } else {
768                                 unsigned int nflags = 0;
769
770                                 if (ipv4_is_multicast(p->iph.daddr))
771                                         nflags = IFF_BROADCAST;
772                                 else if (p->iph.daddr)
773                                         nflags = IFF_POINTOPOINT;
774
775                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
776                                         err = -EINVAL;
777                                         break;
778                                 }
779
780                                 t = netdev_priv(dev);
781                         }
782                 }
783
784                 if (t) {
785                         err = 0;
786                         ip_tunnel_update(itn, t, dev, p, true);
787                 } else
788                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
789                 break;
790
791         case SIOCDELTUNNEL:
792                 err = -EPERM;
793                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
794                         goto done;
795
796                 if (dev == itn->fb_tunnel_dev) {
797                         err = -ENOENT;
798                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
799                         if (t == NULL)
800                                 goto done;
801                         err = -EPERM;
802                         if (t == netdev_priv(itn->fb_tunnel_dev))
803                                 goto done;
804                         dev = t->dev;
805                 }
806                 unregister_netdevice(dev);
807                 err = 0;
808                 break;
809
810         default:
811                 err = -EINVAL;
812         }
813
814 done:
815         return err;
816 }
817 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
818
819 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
820 {
821         struct ip_tunnel *tunnel = netdev_priv(dev);
822         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
823
824         if (new_mtu < 68 ||
825             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
826                 return -EINVAL;
827         dev->mtu = new_mtu;
828         return 0;
829 }
830 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
831
832 static void ip_tunnel_dev_free(struct net_device *dev)
833 {
834         struct ip_tunnel *tunnel = netdev_priv(dev);
835
836         gro_cells_destroy(&tunnel->gro_cells);
837         free_percpu(tunnel->dst_cache);
838         free_percpu(dev->tstats);
839         free_netdev(dev);
840 }
841
842 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
843 {
844         struct ip_tunnel *tunnel = netdev_priv(dev);
845         struct ip_tunnel_net *itn;
846
847         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
848
849         if (itn->fb_tunnel_dev != dev) {
850                 ip_tunnel_del(netdev_priv(dev));
851                 unregister_netdevice_queue(dev, head);
852         }
853 }
854 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
855
856 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
857                                   struct rtnl_link_ops *ops, char *devname)
858 {
859         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
860         struct ip_tunnel_parm parms;
861         unsigned int i;
862
863         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
864                 INIT_HLIST_HEAD(&itn->tunnels[i]);
865
866         if (!ops) {
867                 itn->fb_tunnel_dev = NULL;
868                 return 0;
869         }
870
871         memset(&parms, 0, sizeof(parms));
872         if (devname)
873                 strlcpy(parms.name, devname, IFNAMSIZ);
874
875         rtnl_lock();
876         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
877         /* FB netdevice is special: we have one, and only one per netns.
878          * Allowing to move it to another netns is clearly unsafe.
879          */
880         if (!IS_ERR(itn->fb_tunnel_dev)) {
881                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
882                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
883                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
884         }
885         rtnl_unlock();
886
887         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
888 }
889 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
890
891 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
892                               struct rtnl_link_ops *ops)
893 {
894         struct net *net = dev_net(itn->fb_tunnel_dev);
895         struct net_device *dev, *aux;
896         int h;
897
898         for_each_netdev_safe(net, dev, aux)
899                 if (dev->rtnl_link_ops == ops)
900                         unregister_netdevice_queue(dev, head);
901
902         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
903                 struct ip_tunnel *t;
904                 struct hlist_node *n;
905                 struct hlist_head *thead = &itn->tunnels[h];
906
907                 hlist_for_each_entry_safe(t, n, thead, hash_node)
908                         /* If dev is in the same netns, it has already
909                          * been added to the list by the previous loop.
910                          */
911                         if (!net_eq(dev_net(t->dev), net))
912                                 unregister_netdevice_queue(t->dev, head);
913         }
914 }
915
916 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
917 {
918         LIST_HEAD(list);
919
920         rtnl_lock();
921         ip_tunnel_destroy(itn, &list, ops);
922         unregister_netdevice_many(&list);
923         rtnl_unlock();
924 }
925 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
926
927 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
928                       struct ip_tunnel_parm *p)
929 {
930         struct ip_tunnel *nt;
931         struct net *net = dev_net(dev);
932         struct ip_tunnel_net *itn;
933         int mtu;
934         int err;
935
936         nt = netdev_priv(dev);
937         itn = net_generic(net, nt->ip_tnl_net_id);
938
939         if (ip_tunnel_find(itn, p, dev->type))
940                 return -EEXIST;
941
942         nt->net = net;
943         nt->parms = *p;
944         err = register_netdevice(dev);
945         if (err)
946                 goto out;
947
948         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
949                 eth_hw_addr_random(dev);
950
951         mtu = ip_tunnel_bind_dev(dev);
952         if (!tb[IFLA_MTU])
953                 dev->mtu = mtu;
954
955         ip_tunnel_add(itn, nt);
956
957 out:
958         return err;
959 }
960 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
961
962 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
963                          struct ip_tunnel_parm *p)
964 {
965         struct ip_tunnel *t;
966         struct ip_tunnel *tunnel = netdev_priv(dev);
967         struct net *net = tunnel->net;
968         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
969
970         if (dev == itn->fb_tunnel_dev)
971                 return -EINVAL;
972
973         t = ip_tunnel_find(itn, p, dev->type);
974
975         if (t) {
976                 if (t->dev != dev)
977                         return -EEXIST;
978         } else {
979                 t = tunnel;
980
981                 if (dev->type != ARPHRD_ETHER) {
982                         unsigned int nflags = 0;
983
984                         if (ipv4_is_multicast(p->iph.daddr))
985                                 nflags = IFF_BROADCAST;
986                         else if (p->iph.daddr)
987                                 nflags = IFF_POINTOPOINT;
988
989                         if ((dev->flags ^ nflags) &
990                             (IFF_POINTOPOINT | IFF_BROADCAST))
991                                 return -EINVAL;
992                 }
993         }
994
995         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
996         return 0;
997 }
998 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
999
1000 int ip_tunnel_init(struct net_device *dev)
1001 {
1002         struct ip_tunnel *tunnel = netdev_priv(dev);
1003         struct iphdr *iph = &tunnel->parms.iph;
1004         int i, err;
1005
1006         dev->destructor = ip_tunnel_dev_free;
1007         dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
1008         if (!dev->tstats)
1009                 return -ENOMEM;
1010
1011         for_each_possible_cpu(i) {
1012                 struct pcpu_sw_netstats *ipt_stats;
1013                 ipt_stats = per_cpu_ptr(dev->tstats, i);
1014                 u64_stats_init(&ipt_stats->syncp);
1015         }
1016
1017         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1018         if (!tunnel->dst_cache) {
1019                 free_percpu(dev->tstats);
1020                 return -ENOMEM;
1021         }
1022
1023         err = gro_cells_init(&tunnel->gro_cells, dev);
1024         if (err) {
1025                 free_percpu(tunnel->dst_cache);
1026                 free_percpu(dev->tstats);
1027                 return err;
1028         }
1029
1030         tunnel->dev = dev;
1031         tunnel->net = dev_net(dev);
1032         strcpy(tunnel->parms.name, dev->name);
1033         iph->version            = 4;
1034         iph->ihl                = 5;
1035
1036         return 0;
1037 }
1038 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1039
1040 void ip_tunnel_uninit(struct net_device *dev)
1041 {
1042         struct ip_tunnel *tunnel = netdev_priv(dev);
1043         struct net *net = tunnel->net;
1044         struct ip_tunnel_net *itn;
1045
1046         itn = net_generic(net, tunnel->ip_tnl_net_id);
1047         /* fb_tunnel_dev will be unregisted in net-exit call. */
1048         if (itn->fb_tunnel_dev != dev)
1049                 ip_tunnel_del(netdev_priv(dev));
1050
1051         ip_tunnel_dst_reset_all(tunnel);
1052 }
1053 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1054
1055 /* Do least required initialization, rest of init is done in tunnel_init call */
1056 void ip_tunnel_setup(struct net_device *dev, int net_id)
1057 {
1058         struct ip_tunnel *tunnel = netdev_priv(dev);
1059         tunnel->ip_tnl_net_id = net_id;
1060 }
1061 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1062
1063 MODULE_LICENSE("GPL");