ip_tunnel(ipv4): fix tunnels with "local any remote $remote_ip"
[profile/ivi/kernel-x86-ivi.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst, __be32 saddr)
73 {
74         struct dst_entry *old_dst;
75
76         dst_clone(dst);
77         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
78         dst_release(old_dst);
79         idst->saddr = saddr;
80 }
81
82 static void tunnel_dst_set(struct ip_tunnel *t,
83                            struct dst_entry *dst, __be32 saddr)
84 {
85         __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst, saddr);
86 }
87
88 static void tunnel_dst_reset(struct ip_tunnel *t)
89 {
90         tunnel_dst_set(t, NULL, 0);
91 }
92
93 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
94 {
95         int i;
96
97         for_each_possible_cpu(i)
98                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
99 }
100 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
101
102 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
103                                         u32 cookie, __be32 *saddr)
104 {
105         struct ip_tunnel_dst *idst;
106         struct dst_entry *dst;
107
108         rcu_read_lock();
109         idst = this_cpu_ptr(t->dst_cache);
110         dst = rcu_dereference(idst->dst);
111         if (dst && !atomic_inc_not_zero(&dst->__refcnt))
112                 dst = NULL;
113         if (dst) {
114                 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
115                         *saddr = idst->saddr;
116                 } else {
117                         tunnel_dst_reset(t);
118                         dst_release(dst);
119                         dst = NULL;
120                 }
121         }
122         rcu_read_unlock();
123         return (struct rtable *)dst;
124 }
125
126 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
127                                 __be16 flags, __be32 key)
128 {
129         if (p->i_flags & TUNNEL_KEY) {
130                 if (flags & TUNNEL_KEY)
131                         return key == p->i_key;
132                 else
133                         /* key expected, none present */
134                         return false;
135         } else
136                 return !(flags & TUNNEL_KEY);
137 }
138
139 /* Fallback tunnel: no source, no destination, no key, no options
140
141    Tunnel hash table:
142    We require exact key match i.e. if a key is present in packet
143    it will match only tunnel with the same key; if it is not present,
144    it will match only keyless tunnel.
145
146    All keysless packets, if not matched configured keyless tunnels
147    will match fallback tunnel.
148    Given src, dst and key, find appropriate for input tunnel.
149 */
150 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
151                                    int link, __be16 flags,
152                                    __be32 remote, __be32 local,
153                                    __be32 key)
154 {
155         unsigned int hash;
156         struct ip_tunnel *t, *cand = NULL;
157         struct hlist_head *head;
158
159         hash = ip_tunnel_hash(key, remote);
160         head = &itn->tunnels[hash];
161
162         hlist_for_each_entry_rcu(t, head, hash_node) {
163                 if (local != t->parms.iph.saddr ||
164                     remote != t->parms.iph.daddr ||
165                     !(t->dev->flags & IFF_UP))
166                         continue;
167
168                 if (!ip_tunnel_key_match(&t->parms, flags, key))
169                         continue;
170
171                 if (t->parms.link == link)
172                         return t;
173                 else
174                         cand = t;
175         }
176
177         hlist_for_each_entry_rcu(t, head, hash_node) {
178                 if (remote != t->parms.iph.daddr ||
179                     t->parms.iph.saddr != 0 ||
180                     !(t->dev->flags & IFF_UP))
181                         continue;
182
183                 if (!ip_tunnel_key_match(&t->parms, flags, key))
184                         continue;
185
186                 if (t->parms.link == link)
187                         return t;
188                 else if (!cand)
189                         cand = t;
190         }
191
192         hash = ip_tunnel_hash(key, 0);
193         head = &itn->tunnels[hash];
194
195         hlist_for_each_entry_rcu(t, head, hash_node) {
196                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
197                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
198                         continue;
199
200                 if (!(t->dev->flags & IFF_UP))
201                         continue;
202
203                 if (!ip_tunnel_key_match(&t->parms, flags, key))
204                         continue;
205
206                 if (t->parms.link == link)
207                         return t;
208                 else if (!cand)
209                         cand = t;
210         }
211
212         if (flags & TUNNEL_NO_KEY)
213                 goto skip_key_lookup;
214
215         hlist_for_each_entry_rcu(t, head, hash_node) {
216                 if (t->parms.i_key != key ||
217                     t->parms.iph.saddr != 0 ||
218                     t->parms.iph.daddr != 0 ||
219                     !(t->dev->flags & IFF_UP))
220                         continue;
221
222                 if (t->parms.link == link)
223                         return t;
224                 else if (!cand)
225                         cand = t;
226         }
227
228 skip_key_lookup:
229         if (cand)
230                 return cand;
231
232         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
233                 return netdev_priv(itn->fb_tunnel_dev);
234
235
236         return NULL;
237 }
238 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
239
240 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
241                                     struct ip_tunnel_parm *parms)
242 {
243         unsigned int h;
244         __be32 remote;
245
246         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
247                 remote = parms->iph.daddr;
248         else
249                 remote = 0;
250
251         h = ip_tunnel_hash(parms->i_key, remote);
252         return &itn->tunnels[h];
253 }
254
255 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
256 {
257         struct hlist_head *head = ip_bucket(itn, &t->parms);
258
259         hlist_add_head_rcu(&t->hash_node, head);
260 }
261
262 static void ip_tunnel_del(struct ip_tunnel *t)
263 {
264         hlist_del_init_rcu(&t->hash_node);
265 }
266
267 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
268                                         struct ip_tunnel_parm *parms,
269                                         int type)
270 {
271         __be32 remote = parms->iph.daddr;
272         __be32 local = parms->iph.saddr;
273         __be32 key = parms->i_key;
274         int link = parms->link;
275         struct ip_tunnel *t = NULL;
276         struct hlist_head *head = ip_bucket(itn, parms);
277
278         hlist_for_each_entry_rcu(t, head, hash_node) {
279                 if (local == t->parms.iph.saddr &&
280                     remote == t->parms.iph.daddr &&
281                     key == t->parms.i_key &&
282                     link == t->parms.link &&
283                     type == t->dev->type)
284                         break;
285         }
286         return t;
287 }
288
289 static struct net_device *__ip_tunnel_create(struct net *net,
290                                              const struct rtnl_link_ops *ops,
291                                              struct ip_tunnel_parm *parms)
292 {
293         int err;
294         struct ip_tunnel *tunnel;
295         struct net_device *dev;
296         char name[IFNAMSIZ];
297
298         if (parms->name[0])
299                 strlcpy(name, parms->name, IFNAMSIZ);
300         else {
301                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
302                         err = -E2BIG;
303                         goto failed;
304                 }
305                 strlcpy(name, ops->kind, IFNAMSIZ);
306                 strncat(name, "%d", 2);
307         }
308
309         ASSERT_RTNL();
310         dev = alloc_netdev(ops->priv_size, name, ops->setup);
311         if (!dev) {
312                 err = -ENOMEM;
313                 goto failed;
314         }
315         dev_net_set(dev, net);
316
317         dev->rtnl_link_ops = ops;
318
319         tunnel = netdev_priv(dev);
320         tunnel->parms = *parms;
321         tunnel->net = net;
322
323         err = register_netdevice(dev);
324         if (err)
325                 goto failed_free;
326
327         return dev;
328
329 failed_free:
330         free_netdev(dev);
331 failed:
332         return ERR_PTR(err);
333 }
334
335 static inline void init_tunnel_flow(struct flowi4 *fl4,
336                                     int proto,
337                                     __be32 daddr, __be32 saddr,
338                                     __be32 key, __u8 tos, int oif)
339 {
340         memset(fl4, 0, sizeof(*fl4));
341         fl4->flowi4_oif = oif;
342         fl4->daddr = daddr;
343         fl4->saddr = saddr;
344         fl4->flowi4_tos = tos;
345         fl4->flowi4_proto = proto;
346         fl4->fl4_gre_key = key;
347 }
348
349 static int ip_tunnel_bind_dev(struct net_device *dev)
350 {
351         struct net_device *tdev = NULL;
352         struct ip_tunnel *tunnel = netdev_priv(dev);
353         const struct iphdr *iph;
354         int hlen = LL_MAX_HEADER;
355         int mtu = ETH_DATA_LEN;
356         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
357
358         iph = &tunnel->parms.iph;
359
360         /* Guess output device to choose reasonable mtu and needed_headroom */
361         if (iph->daddr) {
362                 struct flowi4 fl4;
363                 struct rtable *rt;
364
365                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
366                                  iph->saddr, tunnel->parms.o_key,
367                                  RT_TOS(iph->tos), tunnel->parms.link);
368                 rt = ip_route_output_key(tunnel->net, &fl4);
369
370                 if (!IS_ERR(rt)) {
371                         tdev = rt->dst.dev;
372                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
373                         ip_rt_put(rt);
374                 }
375                 if (dev->type != ARPHRD_ETHER)
376                         dev->flags |= IFF_POINTOPOINT;
377         }
378
379         if (!tdev && tunnel->parms.link)
380                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
381
382         if (tdev) {
383                 hlen = tdev->hard_header_len + tdev->needed_headroom;
384                 mtu = tdev->mtu;
385         }
386         dev->iflink = tunnel->parms.link;
387
388         dev->needed_headroom = t_hlen + hlen;
389         mtu -= (dev->hard_header_len + t_hlen);
390
391         if (mtu < 68)
392                 mtu = 68;
393
394         return mtu;
395 }
396
397 static struct ip_tunnel *ip_tunnel_create(struct net *net,
398                                           struct ip_tunnel_net *itn,
399                                           struct ip_tunnel_parm *parms)
400 {
401         struct ip_tunnel *nt, *fbt;
402         struct net_device *dev;
403
404         BUG_ON(!itn->fb_tunnel_dev);
405         fbt = netdev_priv(itn->fb_tunnel_dev);
406         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
407         if (IS_ERR(dev))
408                 return NULL;
409
410         dev->mtu = ip_tunnel_bind_dev(dev);
411
412         nt = netdev_priv(dev);
413         ip_tunnel_add(itn, nt);
414         return nt;
415 }
416
417 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
418                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
419 {
420         struct pcpu_sw_netstats *tstats;
421         const struct iphdr *iph = ip_hdr(skb);
422         int err;
423
424 #ifdef CONFIG_NET_IPGRE_BROADCAST
425         if (ipv4_is_multicast(iph->daddr)) {
426                 tunnel->dev->stats.multicast++;
427                 skb->pkt_type = PACKET_BROADCAST;
428         }
429 #endif
430
431         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
432              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
433                 tunnel->dev->stats.rx_crc_errors++;
434                 tunnel->dev->stats.rx_errors++;
435                 goto drop;
436         }
437
438         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
439                 if (!(tpi->flags&TUNNEL_SEQ) ||
440                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
441                         tunnel->dev->stats.rx_fifo_errors++;
442                         tunnel->dev->stats.rx_errors++;
443                         goto drop;
444                 }
445                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
446         }
447
448         skb_reset_network_header(skb);
449
450         err = IP_ECN_decapsulate(iph, skb);
451         if (unlikely(err)) {
452                 if (log_ecn_error)
453                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
454                                         &iph->saddr, iph->tos);
455                 if (err > 1) {
456                         ++tunnel->dev->stats.rx_frame_errors;
457                         ++tunnel->dev->stats.rx_errors;
458                         goto drop;
459                 }
460         }
461
462         tstats = this_cpu_ptr(tunnel->dev->tstats);
463         u64_stats_update_begin(&tstats->syncp);
464         tstats->rx_packets++;
465         tstats->rx_bytes += skb->len;
466         u64_stats_update_end(&tstats->syncp);
467
468         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
469
470         if (tunnel->dev->type == ARPHRD_ETHER) {
471                 skb->protocol = eth_type_trans(skb, tunnel->dev);
472                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
473         } else {
474                 skb->dev = tunnel->dev;
475         }
476
477         gro_cells_receive(&tunnel->gro_cells, skb);
478         return 0;
479
480 drop:
481         kfree_skb(skb);
482         return 0;
483 }
484 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
485
486 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
487                             struct rtable *rt, __be16 df)
488 {
489         struct ip_tunnel *tunnel = netdev_priv(dev);
490         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
491         int mtu;
492
493         if (df)
494                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
495                                         - sizeof(struct iphdr) - tunnel->hlen;
496         else
497                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
498
499         if (skb_dst(skb))
500                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
501
502         if (skb->protocol == htons(ETH_P_IP)) {
503                 if (!skb_is_gso(skb) &&
504                     (df & htons(IP_DF)) && mtu < pkt_size) {
505                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
506                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
507                         return -E2BIG;
508                 }
509         }
510 #if IS_ENABLED(CONFIG_IPV6)
511         else if (skb->protocol == htons(ETH_P_IPV6)) {
512                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
513
514                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
515                            mtu >= IPV6_MIN_MTU) {
516                         if ((tunnel->parms.iph.daddr &&
517                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
518                             rt6->rt6i_dst.plen == 128) {
519                                 rt6->rt6i_flags |= RTF_MODIFIED;
520                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
521                         }
522                 }
523
524                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
525                                         mtu < pkt_size) {
526                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
527                         return -E2BIG;
528                 }
529         }
530 #endif
531         return 0;
532 }
533
534 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
535                     const struct iphdr *tnl_params, const u8 protocol)
536 {
537         struct ip_tunnel *tunnel = netdev_priv(dev);
538         const struct iphdr *inner_iph;
539         struct flowi4 fl4;
540         u8     tos, ttl;
541         __be16 df;
542         struct rtable *rt;              /* Route to the other host */
543         unsigned int max_headroom;      /* The extra header space needed */
544         __be32 dst;
545         int err;
546         bool connected;
547
548         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
549         connected = (tunnel->parms.iph.daddr != 0);
550
551         dst = tnl_params->daddr;
552         if (dst == 0) {
553                 /* NBMA tunnel */
554
555                 if (skb_dst(skb) == NULL) {
556                         dev->stats.tx_fifo_errors++;
557                         goto tx_error;
558                 }
559
560                 if (skb->protocol == htons(ETH_P_IP)) {
561                         rt = skb_rtable(skb);
562                         dst = rt_nexthop(rt, inner_iph->daddr);
563                 }
564 #if IS_ENABLED(CONFIG_IPV6)
565                 else if (skb->protocol == htons(ETH_P_IPV6)) {
566                         const struct in6_addr *addr6;
567                         struct neighbour *neigh;
568                         bool do_tx_error_icmp;
569                         int addr_type;
570
571                         neigh = dst_neigh_lookup(skb_dst(skb),
572                                                  &ipv6_hdr(skb)->daddr);
573                         if (neigh == NULL)
574                                 goto tx_error;
575
576                         addr6 = (const struct in6_addr *)&neigh->primary_key;
577                         addr_type = ipv6_addr_type(addr6);
578
579                         if (addr_type == IPV6_ADDR_ANY) {
580                                 addr6 = &ipv6_hdr(skb)->daddr;
581                                 addr_type = ipv6_addr_type(addr6);
582                         }
583
584                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
585                                 do_tx_error_icmp = true;
586                         else {
587                                 do_tx_error_icmp = false;
588                                 dst = addr6->s6_addr32[3];
589                         }
590                         neigh_release(neigh);
591                         if (do_tx_error_icmp)
592                                 goto tx_error_icmp;
593                 }
594 #endif
595                 else
596                         goto tx_error;
597
598                 connected = false;
599         }
600
601         tos = tnl_params->tos;
602         if (tos & 0x1) {
603                 tos &= ~0x1;
604                 if (skb->protocol == htons(ETH_P_IP)) {
605                         tos = inner_iph->tos;
606                         connected = false;
607                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
608                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
609                         connected = false;
610                 }
611         }
612
613         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
614                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
615
616         rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
617
618         if (!rt) {
619                 rt = ip_route_output_key(tunnel->net, &fl4);
620
621                 if (IS_ERR(rt)) {
622                         dev->stats.tx_carrier_errors++;
623                         goto tx_error;
624                 }
625                 if (connected)
626                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
627         }
628
629         if (rt->dst.dev == dev) {
630                 ip_rt_put(rt);
631                 dev->stats.collisions++;
632                 goto tx_error;
633         }
634
635         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
636                 ip_rt_put(rt);
637                 goto tx_error;
638         }
639
640         if (tunnel->err_count > 0) {
641                 if (time_before(jiffies,
642                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
643                         tunnel->err_count--;
644
645                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
646                         dst_link_failure(skb);
647                 } else
648                         tunnel->err_count = 0;
649         }
650
651         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
652         ttl = tnl_params->ttl;
653         if (ttl == 0) {
654                 if (skb->protocol == htons(ETH_P_IP))
655                         ttl = inner_iph->ttl;
656 #if IS_ENABLED(CONFIG_IPV6)
657                 else if (skb->protocol == htons(ETH_P_IPV6))
658                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
659 #endif
660                 else
661                         ttl = ip4_dst_hoplimit(&rt->dst);
662         }
663
664         df = tnl_params->frag_off;
665         if (skb->protocol == htons(ETH_P_IP))
666                 df |= (inner_iph->frag_off&htons(IP_DF));
667
668         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
669                         + rt->dst.header_len;
670         if (max_headroom > dev->needed_headroom)
671                 dev->needed_headroom = max_headroom;
672
673         if (skb_cow_head(skb, dev->needed_headroom)) {
674                 dev->stats.tx_dropped++;
675                 kfree_skb(skb);
676                 return;
677         }
678
679         err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
680                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
681         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
682
683         return;
684
685 #if IS_ENABLED(CONFIG_IPV6)
686 tx_error_icmp:
687         dst_link_failure(skb);
688 #endif
689 tx_error:
690         dev->stats.tx_errors++;
691         kfree_skb(skb);
692 }
693 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
694
695 static void ip_tunnel_update(struct ip_tunnel_net *itn,
696                              struct ip_tunnel *t,
697                              struct net_device *dev,
698                              struct ip_tunnel_parm *p,
699                              bool set_mtu)
700 {
701         ip_tunnel_del(t);
702         t->parms.iph.saddr = p->iph.saddr;
703         t->parms.iph.daddr = p->iph.daddr;
704         t->parms.i_key = p->i_key;
705         t->parms.o_key = p->o_key;
706         if (dev->type != ARPHRD_ETHER) {
707                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
708                 memcpy(dev->broadcast, &p->iph.daddr, 4);
709         }
710         ip_tunnel_add(itn, t);
711
712         t->parms.iph.ttl = p->iph.ttl;
713         t->parms.iph.tos = p->iph.tos;
714         t->parms.iph.frag_off = p->iph.frag_off;
715
716         if (t->parms.link != p->link) {
717                 int mtu;
718
719                 t->parms.link = p->link;
720                 mtu = ip_tunnel_bind_dev(dev);
721                 if (set_mtu)
722                         dev->mtu = mtu;
723         }
724         ip_tunnel_dst_reset_all(t);
725         netdev_state_change(dev);
726 }
727
728 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
729 {
730         int err = 0;
731         struct ip_tunnel *t;
732         struct net *net = dev_net(dev);
733         struct ip_tunnel *tunnel = netdev_priv(dev);
734         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
735
736         BUG_ON(!itn->fb_tunnel_dev);
737         switch (cmd) {
738         case SIOCGETTUNNEL:
739                 t = NULL;
740                 if (dev == itn->fb_tunnel_dev)
741                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
742                 if (t == NULL)
743                         t = netdev_priv(dev);
744                 memcpy(p, &t->parms, sizeof(*p));
745                 break;
746
747         case SIOCADDTUNNEL:
748         case SIOCCHGTUNNEL:
749                 err = -EPERM;
750                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
751                         goto done;
752                 if (p->iph.ttl)
753                         p->iph.frag_off |= htons(IP_DF);
754                 if (!(p->i_flags&TUNNEL_KEY))
755                         p->i_key = 0;
756                 if (!(p->o_flags&TUNNEL_KEY))
757                         p->o_key = 0;
758
759                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
760
761                 if (!t && (cmd == SIOCADDTUNNEL))
762                         t = ip_tunnel_create(net, itn, p);
763
764                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
765                         if (t != NULL) {
766                                 if (t->dev != dev) {
767                                         err = -EEXIST;
768                                         break;
769                                 }
770                         } else {
771                                 unsigned int nflags = 0;
772
773                                 if (ipv4_is_multicast(p->iph.daddr))
774                                         nflags = IFF_BROADCAST;
775                                 else if (p->iph.daddr)
776                                         nflags = IFF_POINTOPOINT;
777
778                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
779                                         err = -EINVAL;
780                                         break;
781                                 }
782
783                                 t = netdev_priv(dev);
784                         }
785                 }
786
787                 if (t) {
788                         err = 0;
789                         ip_tunnel_update(itn, t, dev, p, true);
790                 } else
791                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
792                 break;
793
794         case SIOCDELTUNNEL:
795                 err = -EPERM;
796                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
797                         goto done;
798
799                 if (dev == itn->fb_tunnel_dev) {
800                         err = -ENOENT;
801                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
802                         if (t == NULL)
803                                 goto done;
804                         err = -EPERM;
805                         if (t == netdev_priv(itn->fb_tunnel_dev))
806                                 goto done;
807                         dev = t->dev;
808                 }
809                 unregister_netdevice(dev);
810                 err = 0;
811                 break;
812
813         default:
814                 err = -EINVAL;
815         }
816
817 done:
818         return err;
819 }
820 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
821
822 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
823 {
824         struct ip_tunnel *tunnel = netdev_priv(dev);
825         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
826
827         if (new_mtu < 68 ||
828             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
829                 return -EINVAL;
830         dev->mtu = new_mtu;
831         return 0;
832 }
833 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
834
835 static void ip_tunnel_dev_free(struct net_device *dev)
836 {
837         struct ip_tunnel *tunnel = netdev_priv(dev);
838
839         gro_cells_destroy(&tunnel->gro_cells);
840         free_percpu(tunnel->dst_cache);
841         free_percpu(dev->tstats);
842         free_netdev(dev);
843 }
844
845 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
846 {
847         struct ip_tunnel *tunnel = netdev_priv(dev);
848         struct ip_tunnel_net *itn;
849
850         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
851
852         if (itn->fb_tunnel_dev != dev) {
853                 ip_tunnel_del(netdev_priv(dev));
854                 unregister_netdevice_queue(dev, head);
855         }
856 }
857 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
858
859 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
860                                   struct rtnl_link_ops *ops, char *devname)
861 {
862         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
863         struct ip_tunnel_parm parms;
864         unsigned int i;
865
866         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
867                 INIT_HLIST_HEAD(&itn->tunnels[i]);
868
869         if (!ops) {
870                 itn->fb_tunnel_dev = NULL;
871                 return 0;
872         }
873
874         memset(&parms, 0, sizeof(parms));
875         if (devname)
876                 strlcpy(parms.name, devname, IFNAMSIZ);
877
878         rtnl_lock();
879         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
880         /* FB netdevice is special: we have one, and only one per netns.
881          * Allowing to move it to another netns is clearly unsafe.
882          */
883         if (!IS_ERR(itn->fb_tunnel_dev)) {
884                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
885                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
886                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
887         }
888         rtnl_unlock();
889
890         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
891 }
892 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
893
894 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
895                               struct rtnl_link_ops *ops)
896 {
897         struct net *net = dev_net(itn->fb_tunnel_dev);
898         struct net_device *dev, *aux;
899         int h;
900
901         for_each_netdev_safe(net, dev, aux)
902                 if (dev->rtnl_link_ops == ops)
903                         unregister_netdevice_queue(dev, head);
904
905         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
906                 struct ip_tunnel *t;
907                 struct hlist_node *n;
908                 struct hlist_head *thead = &itn->tunnels[h];
909
910                 hlist_for_each_entry_safe(t, n, thead, hash_node)
911                         /* If dev is in the same netns, it has already
912                          * been added to the list by the previous loop.
913                          */
914                         if (!net_eq(dev_net(t->dev), net))
915                                 unregister_netdevice_queue(t->dev, head);
916         }
917 }
918
919 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
920 {
921         LIST_HEAD(list);
922
923         rtnl_lock();
924         ip_tunnel_destroy(itn, &list, ops);
925         unregister_netdevice_many(&list);
926         rtnl_unlock();
927 }
928 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
929
930 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
931                       struct ip_tunnel_parm *p)
932 {
933         struct ip_tunnel *nt;
934         struct net *net = dev_net(dev);
935         struct ip_tunnel_net *itn;
936         int mtu;
937         int err;
938
939         nt = netdev_priv(dev);
940         itn = net_generic(net, nt->ip_tnl_net_id);
941
942         if (ip_tunnel_find(itn, p, dev->type))
943                 return -EEXIST;
944
945         nt->net = net;
946         nt->parms = *p;
947         err = register_netdevice(dev);
948         if (err)
949                 goto out;
950
951         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
952                 eth_hw_addr_random(dev);
953
954         mtu = ip_tunnel_bind_dev(dev);
955         if (!tb[IFLA_MTU])
956                 dev->mtu = mtu;
957
958         ip_tunnel_add(itn, nt);
959
960 out:
961         return err;
962 }
963 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
964
965 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
966                          struct ip_tunnel_parm *p)
967 {
968         struct ip_tunnel *t;
969         struct ip_tunnel *tunnel = netdev_priv(dev);
970         struct net *net = tunnel->net;
971         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
972
973         if (dev == itn->fb_tunnel_dev)
974                 return -EINVAL;
975
976         t = ip_tunnel_find(itn, p, dev->type);
977
978         if (t) {
979                 if (t->dev != dev)
980                         return -EEXIST;
981         } else {
982                 t = tunnel;
983
984                 if (dev->type != ARPHRD_ETHER) {
985                         unsigned int nflags = 0;
986
987                         if (ipv4_is_multicast(p->iph.daddr))
988                                 nflags = IFF_BROADCAST;
989                         else if (p->iph.daddr)
990                                 nflags = IFF_POINTOPOINT;
991
992                         if ((dev->flags ^ nflags) &
993                             (IFF_POINTOPOINT | IFF_BROADCAST))
994                                 return -EINVAL;
995                 }
996         }
997
998         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
999         return 0;
1000 }
1001 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1002
1003 int ip_tunnel_init(struct net_device *dev)
1004 {
1005         struct ip_tunnel *tunnel = netdev_priv(dev);
1006         struct iphdr *iph = &tunnel->parms.iph;
1007         int i, err;
1008
1009         dev->destructor = ip_tunnel_dev_free;
1010         dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
1011         if (!dev->tstats)
1012                 return -ENOMEM;
1013
1014         for_each_possible_cpu(i) {
1015                 struct pcpu_sw_netstats *ipt_stats;
1016                 ipt_stats = per_cpu_ptr(dev->tstats, i);
1017                 u64_stats_init(&ipt_stats->syncp);
1018         }
1019
1020         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1021         if (!tunnel->dst_cache) {
1022                 free_percpu(dev->tstats);
1023                 return -ENOMEM;
1024         }
1025
1026         err = gro_cells_init(&tunnel->gro_cells, dev);
1027         if (err) {
1028                 free_percpu(tunnel->dst_cache);
1029                 free_percpu(dev->tstats);
1030                 return err;
1031         }
1032
1033         tunnel->dev = dev;
1034         tunnel->net = dev_net(dev);
1035         strcpy(tunnel->parms.name, dev->name);
1036         iph->version            = 4;
1037         iph->ihl                = 5;
1038
1039         return 0;
1040 }
1041 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1042
1043 void ip_tunnel_uninit(struct net_device *dev)
1044 {
1045         struct ip_tunnel *tunnel = netdev_priv(dev);
1046         struct net *net = tunnel->net;
1047         struct ip_tunnel_net *itn;
1048
1049         itn = net_generic(net, tunnel->ip_tnl_net_id);
1050         /* fb_tunnel_dev will be unregisted in net-exit call. */
1051         if (itn->fb_tunnel_dev != dev)
1052                 ip_tunnel_del(netdev_priv(dev));
1053
1054         ip_tunnel_dst_reset_all(tunnel);
1055 }
1056 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1057
1058 /* Do least required initialization, rest of init is done in tunnel_init call */
1059 void ip_tunnel_setup(struct net_device *dev, int net_id)
1060 {
1061         struct ip_tunnel *tunnel = netdev_priv(dev);
1062         tunnel->ip_tnl_net_id = net_id;
1063 }
1064 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1065
1066 MODULE_LICENSE("GPL");