Bluetooth: MGMT: Fix marking SCAN_RSP as not connectable
[platform/kernel/linux-starfive.git] / net / ipv4 / ip_tunnel.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52
53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55         return hash_32((__force u32)key ^ (__force u32)remote,
56                          IP_TNL_HASH_BITS);
57 }
58
59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60                                 __be16 flags, __be32 key)
61 {
62         if (p->i_flags & TUNNEL_KEY) {
63                 if (flags & TUNNEL_KEY)
64                         return key == p->i_key;
65                 else
66                         /* key expected, none present */
67                         return false;
68         } else
69                 return !(flags & TUNNEL_KEY);
70 }
71
72 /* Fallback tunnel: no source, no destination, no key, no options
73
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84                                    int link, __be16 flags,
85                                    __be32 remote, __be32 local,
86                                    __be32 key)
87 {
88         struct ip_tunnel *t, *cand = NULL;
89         struct hlist_head *head;
90         struct net_device *ndev;
91         unsigned int hash;
92
93         hash = ip_tunnel_hash(key, remote);
94         head = &itn->tunnels[hash];
95
96         hlist_for_each_entry_rcu(t, head, hash_node) {
97                 if (local != t->parms.iph.saddr ||
98                     remote != t->parms.iph.daddr ||
99                     !(t->dev->flags & IFF_UP))
100                         continue;
101
102                 if (!ip_tunnel_key_match(&t->parms, flags, key))
103                         continue;
104
105                 if (t->parms.link == link)
106                         return t;
107                 else
108                         cand = t;
109         }
110
111         hlist_for_each_entry_rcu(t, head, hash_node) {
112                 if (remote != t->parms.iph.daddr ||
113                     t->parms.iph.saddr != 0 ||
114                     !(t->dev->flags & IFF_UP))
115                         continue;
116
117                 if (!ip_tunnel_key_match(&t->parms, flags, key))
118                         continue;
119
120                 if (t->parms.link == link)
121                         return t;
122                 else if (!cand)
123                         cand = t;
124         }
125
126         hash = ip_tunnel_hash(key, 0);
127         head = &itn->tunnels[hash];
128
129         hlist_for_each_entry_rcu(t, head, hash_node) {
130                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132                         continue;
133
134                 if (!(t->dev->flags & IFF_UP))
135                         continue;
136
137                 if (!ip_tunnel_key_match(&t->parms, flags, key))
138                         continue;
139
140                 if (t->parms.link == link)
141                         return t;
142                 else if (!cand)
143                         cand = t;
144         }
145
146         hlist_for_each_entry_rcu(t, head, hash_node) {
147                 if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148                     t->parms.iph.saddr != 0 ||
149                     t->parms.iph.daddr != 0 ||
150                     !(t->dev->flags & IFF_UP))
151                         continue;
152
153                 if (t->parms.link == link)
154                         return t;
155                 else if (!cand)
156                         cand = t;
157         }
158
159         if (cand)
160                 return cand;
161
162         t = rcu_dereference(itn->collect_md_tun);
163         if (t && t->dev->flags & IFF_UP)
164                 return t;
165
166         ndev = READ_ONCE(itn->fb_tunnel_dev);
167         if (ndev && ndev->flags & IFF_UP)
168                 return netdev_priv(ndev);
169
170         return NULL;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173
174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175                                     struct ip_tunnel_parm *parms)
176 {
177         unsigned int h;
178         __be32 remote;
179         __be32 i_key = parms->i_key;
180
181         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182                 remote = parms->iph.daddr;
183         else
184                 remote = 0;
185
186         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187                 i_key = 0;
188
189         h = ip_tunnel_hash(i_key, remote);
190         return &itn->tunnels[h];
191 }
192
193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194 {
195         struct hlist_head *head = ip_bucket(itn, &t->parms);
196
197         if (t->collect_md)
198                 rcu_assign_pointer(itn->collect_md_tun, t);
199         hlist_add_head_rcu(&t->hash_node, head);
200 }
201
202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203 {
204         if (t->collect_md)
205                 rcu_assign_pointer(itn->collect_md_tun, NULL);
206         hlist_del_init_rcu(&t->hash_node);
207 }
208
209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210                                         struct ip_tunnel_parm *parms,
211                                         int type)
212 {
213         __be32 remote = parms->iph.daddr;
214         __be32 local = parms->iph.saddr;
215         __be32 key = parms->i_key;
216         __be16 flags = parms->i_flags;
217         int link = parms->link;
218         struct ip_tunnel *t = NULL;
219         struct hlist_head *head = ip_bucket(itn, parms);
220
221         hlist_for_each_entry_rcu(t, head, hash_node) {
222                 if (local == t->parms.iph.saddr &&
223                     remote == t->parms.iph.daddr &&
224                     link == t->parms.link &&
225                     type == t->dev->type &&
226                     ip_tunnel_key_match(&t->parms, flags, key))
227                         break;
228         }
229         return t;
230 }
231
232 static struct net_device *__ip_tunnel_create(struct net *net,
233                                              const struct rtnl_link_ops *ops,
234                                              struct ip_tunnel_parm *parms)
235 {
236         int err;
237         struct ip_tunnel *tunnel;
238         struct net_device *dev;
239         char name[IFNAMSIZ];
240
241         err = -E2BIG;
242         if (parms->name[0]) {
243                 if (!dev_valid_name(parms->name))
244                         goto failed;
245                 strscpy(name, parms->name, IFNAMSIZ);
246         } else {
247                 if (strlen(ops->kind) > (IFNAMSIZ - 3))
248                         goto failed;
249                 strcpy(name, ops->kind);
250                 strcat(name, "%d");
251         }
252
253         ASSERT_RTNL();
254         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255         if (!dev) {
256                 err = -ENOMEM;
257                 goto failed;
258         }
259         dev_net_set(dev, net);
260
261         dev->rtnl_link_ops = ops;
262
263         tunnel = netdev_priv(dev);
264         tunnel->parms = *parms;
265         tunnel->net = net;
266
267         err = register_netdevice(dev);
268         if (err)
269                 goto failed_free;
270
271         return dev;
272
273 failed_free:
274         free_netdev(dev);
275 failed:
276         return ERR_PTR(err);
277 }
278
279 static int ip_tunnel_bind_dev(struct net_device *dev)
280 {
281         struct net_device *tdev = NULL;
282         struct ip_tunnel *tunnel = netdev_priv(dev);
283         const struct iphdr *iph;
284         int hlen = LL_MAX_HEADER;
285         int mtu = ETH_DATA_LEN;
286         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287
288         iph = &tunnel->parms.iph;
289
290         /* Guess output device to choose reasonable mtu and needed_headroom */
291         if (iph->daddr) {
292                 struct flowi4 fl4;
293                 struct rtable *rt;
294
295                 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296                                     iph->saddr, tunnel->parms.o_key,
297                                     RT_TOS(iph->tos), dev_net(dev),
298                                     tunnel->parms.link, tunnel->fwmark, 0, 0);
299                 rt = ip_route_output_key(tunnel->net, &fl4);
300
301                 if (!IS_ERR(rt)) {
302                         tdev = rt->dst.dev;
303                         ip_rt_put(rt);
304                 }
305                 if (dev->type != ARPHRD_ETHER)
306                         dev->flags |= IFF_POINTOPOINT;
307
308                 dst_cache_reset(&tunnel->dst_cache);
309         }
310
311         if (!tdev && tunnel->parms.link)
312                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313
314         if (tdev) {
315                 hlen = tdev->hard_header_len + tdev->needed_headroom;
316                 mtu = min(tdev->mtu, IP_MAX_MTU);
317         }
318
319         dev->needed_headroom = t_hlen + hlen;
320         mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321
322         if (mtu < IPV4_MIN_MTU)
323                 mtu = IPV4_MIN_MTU;
324
325         return mtu;
326 }
327
328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
329                                           struct ip_tunnel_net *itn,
330                                           struct ip_tunnel_parm *parms)
331 {
332         struct ip_tunnel *nt;
333         struct net_device *dev;
334         int t_hlen;
335         int mtu;
336         int err;
337
338         dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339         if (IS_ERR(dev))
340                 return ERR_CAST(dev);
341
342         mtu = ip_tunnel_bind_dev(dev);
343         err = dev_set_mtu(dev, mtu);
344         if (err)
345                 goto err_dev_set_mtu;
346
347         nt = netdev_priv(dev);
348         t_hlen = nt->hlen + sizeof(struct iphdr);
349         dev->min_mtu = ETH_MIN_MTU;
350         dev->max_mtu = IP_MAX_MTU - t_hlen;
351         if (dev->type == ARPHRD_ETHER)
352                 dev->max_mtu -= dev->hard_header_len;
353
354         ip_tunnel_add(itn, nt);
355         return nt;
356
357 err_dev_set_mtu:
358         unregister_netdevice(dev);
359         return ERR_PTR(err);
360 }
361
362 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
363                   const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
364                   bool log_ecn_error)
365 {
366         const struct iphdr *iph = ip_hdr(skb);
367         int err;
368
369 #ifdef CONFIG_NET_IPGRE_BROADCAST
370         if (ipv4_is_multicast(iph->daddr)) {
371                 tunnel->dev->stats.multicast++;
372                 skb->pkt_type = PACKET_BROADCAST;
373         }
374 #endif
375
376         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
377              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
378                 tunnel->dev->stats.rx_crc_errors++;
379                 tunnel->dev->stats.rx_errors++;
380                 goto drop;
381         }
382
383         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
384                 if (!(tpi->flags&TUNNEL_SEQ) ||
385                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
386                         tunnel->dev->stats.rx_fifo_errors++;
387                         tunnel->dev->stats.rx_errors++;
388                         goto drop;
389                 }
390                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
391         }
392
393         skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
394
395         err = IP_ECN_decapsulate(iph, skb);
396         if (unlikely(err)) {
397                 if (log_ecn_error)
398                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
399                                         &iph->saddr, iph->tos);
400                 if (err > 1) {
401                         ++tunnel->dev->stats.rx_frame_errors;
402                         ++tunnel->dev->stats.rx_errors;
403                         goto drop;
404                 }
405         }
406
407         dev_sw_netstats_rx_add(tunnel->dev, skb->len);
408         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
409
410         if (tunnel->dev->type == ARPHRD_ETHER) {
411                 skb->protocol = eth_type_trans(skb, tunnel->dev);
412                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
413         } else {
414                 skb->dev = tunnel->dev;
415         }
416
417         if (tun_dst)
418                 skb_dst_set(skb, (struct dst_entry *)tun_dst);
419
420         gro_cells_receive(&tunnel->gro_cells, skb);
421         return 0;
422
423 drop:
424         if (tun_dst)
425                 dst_release((struct dst_entry *)tun_dst);
426         kfree_skb(skb);
427         return 0;
428 }
429 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
430
431 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
432                             unsigned int num)
433 {
434         if (num >= MAX_IPTUN_ENCAP_OPS)
435                 return -ERANGE;
436
437         return !cmpxchg((const struct ip_tunnel_encap_ops **)
438                         &iptun_encaps[num],
439                         NULL, ops) ? 0 : -1;
440 }
441 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
442
443 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
444                             unsigned int num)
445 {
446         int ret;
447
448         if (num >= MAX_IPTUN_ENCAP_OPS)
449                 return -ERANGE;
450
451         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
452                        &iptun_encaps[num],
453                        ops, NULL) == ops) ? 0 : -1;
454
455         synchronize_net();
456
457         return ret;
458 }
459 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
460
461 int ip_tunnel_encap_setup(struct ip_tunnel *t,
462                           struct ip_tunnel_encap *ipencap)
463 {
464         int hlen;
465
466         memset(&t->encap, 0, sizeof(t->encap));
467
468         hlen = ip_encap_hlen(ipencap);
469         if (hlen < 0)
470                 return hlen;
471
472         t->encap.type = ipencap->type;
473         t->encap.sport = ipencap->sport;
474         t->encap.dport = ipencap->dport;
475         t->encap.flags = ipencap->flags;
476
477         t->encap_hlen = hlen;
478         t->hlen = t->encap_hlen + t->tun_hlen;
479
480         return 0;
481 }
482 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
483
484 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
485                             struct rtable *rt, __be16 df,
486                             const struct iphdr *inner_iph,
487                             int tunnel_hlen, __be32 dst, bool md)
488 {
489         struct ip_tunnel *tunnel = netdev_priv(dev);
490         int pkt_size;
491         int mtu;
492
493         tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
494         pkt_size = skb->len - tunnel_hlen;
495         pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
496
497         if (df) {
498                 mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
499                 mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
500         } else {
501                 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
502         }
503
504         if (skb_valid_dst(skb))
505                 skb_dst_update_pmtu_no_confirm(skb, mtu);
506
507         if (skb->protocol == htons(ETH_P_IP)) {
508                 if (!skb_is_gso(skb) &&
509                     (inner_iph->frag_off & htons(IP_DF)) &&
510                     mtu < pkt_size) {
511                         icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
512                         return -E2BIG;
513                 }
514         }
515 #if IS_ENABLED(CONFIG_IPV6)
516         else if (skb->protocol == htons(ETH_P_IPV6)) {
517                 struct rt6_info *rt6;
518                 __be32 daddr;
519
520                 rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
521                                            NULL;
522                 daddr = md ? dst : tunnel->parms.iph.daddr;
523
524                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
525                            mtu >= IPV6_MIN_MTU) {
526                         if ((daddr && !ipv4_is_multicast(daddr)) ||
527                             rt6->rt6i_dst.plen == 128) {
528                                 rt6->rt6i_flags |= RTF_MODIFIED;
529                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
530                         }
531                 }
532
533                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
534                                         mtu < pkt_size) {
535                         icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
536                         return -E2BIG;
537                 }
538         }
539 #endif
540         return 0;
541 }
542
543 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
544                        u8 proto, int tunnel_hlen)
545 {
546         struct ip_tunnel *tunnel = netdev_priv(dev);
547         u32 headroom = sizeof(struct iphdr);
548         struct ip_tunnel_info *tun_info;
549         const struct ip_tunnel_key *key;
550         const struct iphdr *inner_iph;
551         struct rtable *rt = NULL;
552         struct flowi4 fl4;
553         __be16 df = 0;
554         u8 tos, ttl;
555         bool use_cache;
556
557         tun_info = skb_tunnel_info(skb);
558         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
559                      ip_tunnel_info_af(tun_info) != AF_INET))
560                 goto tx_error;
561         key = &tun_info->key;
562         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
563         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
564         tos = key->tos;
565         if (tos == 1) {
566                 if (skb->protocol == htons(ETH_P_IP))
567                         tos = inner_iph->tos;
568                 else if (skb->protocol == htons(ETH_P_IPV6))
569                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
570         }
571         ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
572                             tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
573                             dev_net(dev), 0, skb->mark, skb_get_hash(skb),
574                             key->flow_flags);
575         if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
576                 goto tx_error;
577
578         use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
579         if (use_cache)
580                 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
581         if (!rt) {
582                 rt = ip_route_output_key(tunnel->net, &fl4);
583                 if (IS_ERR(rt)) {
584                         dev->stats.tx_carrier_errors++;
585                         goto tx_error;
586                 }
587                 if (use_cache)
588                         dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
589                                           fl4.saddr);
590         }
591         if (rt->dst.dev == dev) {
592                 ip_rt_put(rt);
593                 dev->stats.collisions++;
594                 goto tx_error;
595         }
596
597         if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
598                 df = htons(IP_DF);
599         if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
600                             key->u.ipv4.dst, true)) {
601                 ip_rt_put(rt);
602                 goto tx_error;
603         }
604
605         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
606         ttl = key->ttl;
607         if (ttl == 0) {
608                 if (skb->protocol == htons(ETH_P_IP))
609                         ttl = inner_iph->ttl;
610                 else if (skb->protocol == htons(ETH_P_IPV6))
611                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
612                 else
613                         ttl = ip4_dst_hoplimit(&rt->dst);
614         }
615
616         headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
617         if (headroom > READ_ONCE(dev->needed_headroom))
618                 WRITE_ONCE(dev->needed_headroom, headroom);
619
620         if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
621                 ip_rt_put(rt);
622                 goto tx_dropped;
623         }
624         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
625                       df, !net_eq(tunnel->net, dev_net(dev)));
626         return;
627 tx_error:
628         dev->stats.tx_errors++;
629         goto kfree;
630 tx_dropped:
631         dev->stats.tx_dropped++;
632 kfree:
633         kfree_skb(skb);
634 }
635 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
636
637 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
638                     const struct iphdr *tnl_params, u8 protocol)
639 {
640         struct ip_tunnel *tunnel = netdev_priv(dev);
641         struct ip_tunnel_info *tun_info = NULL;
642         const struct iphdr *inner_iph;
643         unsigned int max_headroom;      /* The extra header space needed */
644         struct rtable *rt = NULL;               /* Route to the other host */
645         __be16 payload_protocol;
646         bool use_cache = false;
647         struct flowi4 fl4;
648         bool md = false;
649         bool connected;
650         u8 tos, ttl;
651         __be32 dst;
652         __be16 df;
653
654         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
655         connected = (tunnel->parms.iph.daddr != 0);
656         payload_protocol = skb_protocol(skb, true);
657
658         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
659
660         dst = tnl_params->daddr;
661         if (dst == 0) {
662                 /* NBMA tunnel */
663
664                 if (!skb_dst(skb)) {
665                         dev->stats.tx_fifo_errors++;
666                         goto tx_error;
667                 }
668
669                 tun_info = skb_tunnel_info(skb);
670                 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
671                     ip_tunnel_info_af(tun_info) == AF_INET &&
672                     tun_info->key.u.ipv4.dst) {
673                         dst = tun_info->key.u.ipv4.dst;
674                         md = true;
675                         connected = true;
676                 } else if (payload_protocol == htons(ETH_P_IP)) {
677                         rt = skb_rtable(skb);
678                         dst = rt_nexthop(rt, inner_iph->daddr);
679                 }
680 #if IS_ENABLED(CONFIG_IPV6)
681                 else if (payload_protocol == htons(ETH_P_IPV6)) {
682                         const struct in6_addr *addr6;
683                         struct neighbour *neigh;
684                         bool do_tx_error_icmp;
685                         int addr_type;
686
687                         neigh = dst_neigh_lookup(skb_dst(skb),
688                                                  &ipv6_hdr(skb)->daddr);
689                         if (!neigh)
690                                 goto tx_error;
691
692                         addr6 = (const struct in6_addr *)&neigh->primary_key;
693                         addr_type = ipv6_addr_type(addr6);
694
695                         if (addr_type == IPV6_ADDR_ANY) {
696                                 addr6 = &ipv6_hdr(skb)->daddr;
697                                 addr_type = ipv6_addr_type(addr6);
698                         }
699
700                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
701                                 do_tx_error_icmp = true;
702                         else {
703                                 do_tx_error_icmp = false;
704                                 dst = addr6->s6_addr32[3];
705                         }
706                         neigh_release(neigh);
707                         if (do_tx_error_icmp)
708                                 goto tx_error_icmp;
709                 }
710 #endif
711                 else
712                         goto tx_error;
713
714                 if (!md)
715                         connected = false;
716         }
717
718         tos = tnl_params->tos;
719         if (tos & 0x1) {
720                 tos &= ~0x1;
721                 if (payload_protocol == htons(ETH_P_IP)) {
722                         tos = inner_iph->tos;
723                         connected = false;
724                 } else if (payload_protocol == htons(ETH_P_IPV6)) {
725                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
726                         connected = false;
727                 }
728         }
729
730         ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
731                             tunnel->parms.o_key, RT_TOS(tos),
732                             dev_net(dev), tunnel->parms.link,
733                             tunnel->fwmark, skb_get_hash(skb), 0);
734
735         if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
736                 goto tx_error;
737
738         if (connected && md) {
739                 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
740                 if (use_cache)
741                         rt = dst_cache_get_ip4(&tun_info->dst_cache,
742                                                &fl4.saddr);
743         } else {
744                 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
745                                                 &fl4.saddr) : NULL;
746         }
747
748         if (!rt) {
749                 rt = ip_route_output_key(tunnel->net, &fl4);
750
751                 if (IS_ERR(rt)) {
752                         dev->stats.tx_carrier_errors++;
753                         goto tx_error;
754                 }
755                 if (use_cache)
756                         dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
757                                           fl4.saddr);
758                 else if (!md && connected)
759                         dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
760                                           fl4.saddr);
761         }
762
763         if (rt->dst.dev == dev) {
764                 ip_rt_put(rt);
765                 dev->stats.collisions++;
766                 goto tx_error;
767         }
768
769         df = tnl_params->frag_off;
770         if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
771                 df |= (inner_iph->frag_off & htons(IP_DF));
772
773         if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
774                 ip_rt_put(rt);
775                 goto tx_error;
776         }
777
778         if (tunnel->err_count > 0) {
779                 if (time_before(jiffies,
780                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
781                         tunnel->err_count--;
782
783                         dst_link_failure(skb);
784                 } else
785                         tunnel->err_count = 0;
786         }
787
788         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
789         ttl = tnl_params->ttl;
790         if (ttl == 0) {
791                 if (payload_protocol == htons(ETH_P_IP))
792                         ttl = inner_iph->ttl;
793 #if IS_ENABLED(CONFIG_IPV6)
794                 else if (payload_protocol == htons(ETH_P_IPV6))
795                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
796 #endif
797                 else
798                         ttl = ip4_dst_hoplimit(&rt->dst);
799         }
800
801         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
802                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
803         if (max_headroom > READ_ONCE(dev->needed_headroom))
804                 WRITE_ONCE(dev->needed_headroom, max_headroom);
805
806         if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
807                 ip_rt_put(rt);
808                 dev->stats.tx_dropped++;
809                 kfree_skb(skb);
810                 return;
811         }
812
813         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
814                       df, !net_eq(tunnel->net, dev_net(dev)));
815         return;
816
817 #if IS_ENABLED(CONFIG_IPV6)
818 tx_error_icmp:
819         dst_link_failure(skb);
820 #endif
821 tx_error:
822         dev->stats.tx_errors++;
823         kfree_skb(skb);
824 }
825 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
826
827 static void ip_tunnel_update(struct ip_tunnel_net *itn,
828                              struct ip_tunnel *t,
829                              struct net_device *dev,
830                              struct ip_tunnel_parm *p,
831                              bool set_mtu,
832                              __u32 fwmark)
833 {
834         ip_tunnel_del(itn, t);
835         t->parms.iph.saddr = p->iph.saddr;
836         t->parms.iph.daddr = p->iph.daddr;
837         t->parms.i_key = p->i_key;
838         t->parms.o_key = p->o_key;
839         if (dev->type != ARPHRD_ETHER) {
840                 __dev_addr_set(dev, &p->iph.saddr, 4);
841                 memcpy(dev->broadcast, &p->iph.daddr, 4);
842         }
843         ip_tunnel_add(itn, t);
844
845         t->parms.iph.ttl = p->iph.ttl;
846         t->parms.iph.tos = p->iph.tos;
847         t->parms.iph.frag_off = p->iph.frag_off;
848
849         if (t->parms.link != p->link || t->fwmark != fwmark) {
850                 int mtu;
851
852                 t->parms.link = p->link;
853                 t->fwmark = fwmark;
854                 mtu = ip_tunnel_bind_dev(dev);
855                 if (set_mtu)
856                         dev->mtu = mtu;
857         }
858         dst_cache_reset(&t->dst_cache);
859         netdev_state_change(dev);
860 }
861
862 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
863 {
864         int err = 0;
865         struct ip_tunnel *t = netdev_priv(dev);
866         struct net *net = t->net;
867         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
868
869         switch (cmd) {
870         case SIOCGETTUNNEL:
871                 if (dev == itn->fb_tunnel_dev) {
872                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
873                         if (!t)
874                                 t = netdev_priv(dev);
875                 }
876                 memcpy(p, &t->parms, sizeof(*p));
877                 break;
878
879         case SIOCADDTUNNEL:
880         case SIOCCHGTUNNEL:
881                 err = -EPERM;
882                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
883                         goto done;
884                 if (p->iph.ttl)
885                         p->iph.frag_off |= htons(IP_DF);
886                 if (!(p->i_flags & VTI_ISVTI)) {
887                         if (!(p->i_flags & TUNNEL_KEY))
888                                 p->i_key = 0;
889                         if (!(p->o_flags & TUNNEL_KEY))
890                                 p->o_key = 0;
891                 }
892
893                 t = ip_tunnel_find(itn, p, itn->type);
894
895                 if (cmd == SIOCADDTUNNEL) {
896                         if (!t) {
897                                 t = ip_tunnel_create(net, itn, p);
898                                 err = PTR_ERR_OR_ZERO(t);
899                                 break;
900                         }
901
902                         err = -EEXIST;
903                         break;
904                 }
905                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
906                         if (t) {
907                                 if (t->dev != dev) {
908                                         err = -EEXIST;
909                                         break;
910                                 }
911                         } else {
912                                 unsigned int nflags = 0;
913
914                                 if (ipv4_is_multicast(p->iph.daddr))
915                                         nflags = IFF_BROADCAST;
916                                 else if (p->iph.daddr)
917                                         nflags = IFF_POINTOPOINT;
918
919                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
920                                         err = -EINVAL;
921                                         break;
922                                 }
923
924                                 t = netdev_priv(dev);
925                         }
926                 }
927
928                 if (t) {
929                         err = 0;
930                         ip_tunnel_update(itn, t, dev, p, true, 0);
931                 } else {
932                         err = -ENOENT;
933                 }
934                 break;
935
936         case SIOCDELTUNNEL:
937                 err = -EPERM;
938                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
939                         goto done;
940
941                 if (dev == itn->fb_tunnel_dev) {
942                         err = -ENOENT;
943                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
944                         if (!t)
945                                 goto done;
946                         err = -EPERM;
947                         if (t == netdev_priv(itn->fb_tunnel_dev))
948                                 goto done;
949                         dev = t->dev;
950                 }
951                 unregister_netdevice(dev);
952                 err = 0;
953                 break;
954
955         default:
956                 err = -EINVAL;
957         }
958
959 done:
960         return err;
961 }
962 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
963
964 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
965                              void __user *data, int cmd)
966 {
967         struct ip_tunnel_parm p;
968         int err;
969
970         if (copy_from_user(&p, data, sizeof(p)))
971                 return -EFAULT;
972         err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
973         if (!err && copy_to_user(data, &p, sizeof(p)))
974                 return -EFAULT;
975         return err;
976 }
977 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
978
979 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
980 {
981         struct ip_tunnel *tunnel = netdev_priv(dev);
982         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
983         int max_mtu = IP_MAX_MTU - t_hlen;
984
985         if (dev->type == ARPHRD_ETHER)
986                 max_mtu -= dev->hard_header_len;
987
988         if (new_mtu < ETH_MIN_MTU)
989                 return -EINVAL;
990
991         if (new_mtu > max_mtu) {
992                 if (strict)
993                         return -EINVAL;
994
995                 new_mtu = max_mtu;
996         }
997
998         dev->mtu = new_mtu;
999         return 0;
1000 }
1001 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1002
1003 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1004 {
1005         return __ip_tunnel_change_mtu(dev, new_mtu, true);
1006 }
1007 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1008
1009 static void ip_tunnel_dev_free(struct net_device *dev)
1010 {
1011         struct ip_tunnel *tunnel = netdev_priv(dev);
1012
1013         gro_cells_destroy(&tunnel->gro_cells);
1014         dst_cache_destroy(&tunnel->dst_cache);
1015         free_percpu(dev->tstats);
1016 }
1017
1018 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1019 {
1020         struct ip_tunnel *tunnel = netdev_priv(dev);
1021         struct ip_tunnel_net *itn;
1022
1023         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1024
1025         if (itn->fb_tunnel_dev != dev) {
1026                 ip_tunnel_del(itn, netdev_priv(dev));
1027                 unregister_netdevice_queue(dev, head);
1028         }
1029 }
1030 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1031
1032 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1033 {
1034         struct ip_tunnel *tunnel = netdev_priv(dev);
1035
1036         return tunnel->net;
1037 }
1038 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1039
1040 int ip_tunnel_get_iflink(const struct net_device *dev)
1041 {
1042         struct ip_tunnel *tunnel = netdev_priv(dev);
1043
1044         return tunnel->parms.link;
1045 }
1046 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1047
1048 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1049                                   struct rtnl_link_ops *ops, char *devname)
1050 {
1051         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1052         struct ip_tunnel_parm parms;
1053         unsigned int i;
1054
1055         itn->rtnl_link_ops = ops;
1056         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1057                 INIT_HLIST_HEAD(&itn->tunnels[i]);
1058
1059         if (!ops || !net_has_fallback_tunnels(net)) {
1060                 struct ip_tunnel_net *it_init_net;
1061
1062                 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1063                 itn->type = it_init_net->type;
1064                 itn->fb_tunnel_dev = NULL;
1065                 return 0;
1066         }
1067
1068         memset(&parms, 0, sizeof(parms));
1069         if (devname)
1070                 strscpy(parms.name, devname, IFNAMSIZ);
1071
1072         rtnl_lock();
1073         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1074         /* FB netdevice is special: we have one, and only one per netns.
1075          * Allowing to move it to another netns is clearly unsafe.
1076          */
1077         if (!IS_ERR(itn->fb_tunnel_dev)) {
1078                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1079                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1080                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1081                 itn->type = itn->fb_tunnel_dev->type;
1082         }
1083         rtnl_unlock();
1084
1085         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1086 }
1087 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1088
1089 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1090                               struct list_head *head,
1091                               struct rtnl_link_ops *ops)
1092 {
1093         struct net_device *dev, *aux;
1094         int h;
1095
1096         for_each_netdev_safe(net, dev, aux)
1097                 if (dev->rtnl_link_ops == ops)
1098                         unregister_netdevice_queue(dev, head);
1099
1100         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1101                 struct ip_tunnel *t;
1102                 struct hlist_node *n;
1103                 struct hlist_head *thead = &itn->tunnels[h];
1104
1105                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1106                         /* If dev is in the same netns, it has already
1107                          * been added to the list by the previous loop.
1108                          */
1109                         if (!net_eq(dev_net(t->dev), net))
1110                                 unregister_netdevice_queue(t->dev, head);
1111         }
1112 }
1113
1114 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1115                            struct rtnl_link_ops *ops)
1116 {
1117         struct ip_tunnel_net *itn;
1118         struct net *net;
1119         LIST_HEAD(list);
1120
1121         rtnl_lock();
1122         list_for_each_entry(net, net_list, exit_list) {
1123                 itn = net_generic(net, id);
1124                 ip_tunnel_destroy(net, itn, &list, ops);
1125         }
1126         unregister_netdevice_many(&list);
1127         rtnl_unlock();
1128 }
1129 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1130
1131 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1132                       struct ip_tunnel_parm *p, __u32 fwmark)
1133 {
1134         struct ip_tunnel *nt;
1135         struct net *net = dev_net(dev);
1136         struct ip_tunnel_net *itn;
1137         int mtu;
1138         int err;
1139
1140         nt = netdev_priv(dev);
1141         itn = net_generic(net, nt->ip_tnl_net_id);
1142
1143         if (nt->collect_md) {
1144                 if (rtnl_dereference(itn->collect_md_tun))
1145                         return -EEXIST;
1146         } else {
1147                 if (ip_tunnel_find(itn, p, dev->type))
1148                         return -EEXIST;
1149         }
1150
1151         nt->net = net;
1152         nt->parms = *p;
1153         nt->fwmark = fwmark;
1154         err = register_netdevice(dev);
1155         if (err)
1156                 goto err_register_netdevice;
1157
1158         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1159                 eth_hw_addr_random(dev);
1160
1161         mtu = ip_tunnel_bind_dev(dev);
1162         if (tb[IFLA_MTU]) {
1163                 unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1164
1165                 if (dev->type == ARPHRD_ETHER)
1166                         max -= dev->hard_header_len;
1167
1168                 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1169         }
1170
1171         err = dev_set_mtu(dev, mtu);
1172         if (err)
1173                 goto err_dev_set_mtu;
1174
1175         ip_tunnel_add(itn, nt);
1176         return 0;
1177
1178 err_dev_set_mtu:
1179         unregister_netdevice(dev);
1180 err_register_netdevice:
1181         return err;
1182 }
1183 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1184
1185 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1186                          struct ip_tunnel_parm *p, __u32 fwmark)
1187 {
1188         struct ip_tunnel *t;
1189         struct ip_tunnel *tunnel = netdev_priv(dev);
1190         struct net *net = tunnel->net;
1191         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1192
1193         if (dev == itn->fb_tunnel_dev)
1194                 return -EINVAL;
1195
1196         t = ip_tunnel_find(itn, p, dev->type);
1197
1198         if (t) {
1199                 if (t->dev != dev)
1200                         return -EEXIST;
1201         } else {
1202                 t = tunnel;
1203
1204                 if (dev->type != ARPHRD_ETHER) {
1205                         unsigned int nflags = 0;
1206
1207                         if (ipv4_is_multicast(p->iph.daddr))
1208                                 nflags = IFF_BROADCAST;
1209                         else if (p->iph.daddr)
1210                                 nflags = IFF_POINTOPOINT;
1211
1212                         if ((dev->flags ^ nflags) &
1213                             (IFF_POINTOPOINT | IFF_BROADCAST))
1214                                 return -EINVAL;
1215                 }
1216         }
1217
1218         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1219         return 0;
1220 }
1221 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1222
1223 int ip_tunnel_init(struct net_device *dev)
1224 {
1225         struct ip_tunnel *tunnel = netdev_priv(dev);
1226         struct iphdr *iph = &tunnel->parms.iph;
1227         int err;
1228
1229         dev->needs_free_netdev = true;
1230         dev->priv_destructor = ip_tunnel_dev_free;
1231         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1232         if (!dev->tstats)
1233                 return -ENOMEM;
1234
1235         err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1236         if (err) {
1237                 free_percpu(dev->tstats);
1238                 return err;
1239         }
1240
1241         err = gro_cells_init(&tunnel->gro_cells, dev);
1242         if (err) {
1243                 dst_cache_destroy(&tunnel->dst_cache);
1244                 free_percpu(dev->tstats);
1245                 return err;
1246         }
1247
1248         tunnel->dev = dev;
1249         tunnel->net = dev_net(dev);
1250         strcpy(tunnel->parms.name, dev->name);
1251         iph->version            = 4;
1252         iph->ihl                = 5;
1253
1254         if (tunnel->collect_md)
1255                 netif_keep_dst(dev);
1256         return 0;
1257 }
1258 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1259
1260 void ip_tunnel_uninit(struct net_device *dev)
1261 {
1262         struct ip_tunnel *tunnel = netdev_priv(dev);
1263         struct net *net = tunnel->net;
1264         struct ip_tunnel_net *itn;
1265
1266         itn = net_generic(net, tunnel->ip_tnl_net_id);
1267         ip_tunnel_del(itn, netdev_priv(dev));
1268         if (itn->fb_tunnel_dev == dev)
1269                 WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1270
1271         dst_cache_reset(&tunnel->dst_cache);
1272 }
1273 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1274
1275 /* Do least required initialization, rest of init is done in tunnel_init call */
1276 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1277 {
1278         struct ip_tunnel *tunnel = netdev_priv(dev);
1279         tunnel->ip_tnl_net_id = net_id;
1280 }
1281 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1282
1283 MODULE_LICENSE("GPL");