2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
39 #include <net/protocol.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
51 #if IS_ENABLED(CONFIG_IPV6)
53 #include <net/ip6_fib.h>
54 #include <net/ip6_route.h>
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
69 and silently drop packet when it expires. It is a good
70 solution, but it supposes maintaining new variable in ALL
71 skb, even if no tunneling is used.
73 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
96 taking into account fragmentation. TO be short, ttl is not solution at all.
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
103 rapidly degrades to value <68, where looping stops.
104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
127 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
128 static int ipgre_tunnel_init(struct net_device *dev);
129 static void ipgre_tunnel_setup(struct net_device *dev);
130 static int ipgre_tunnel_bind_dev(struct net_device *dev);
132 /* Fallback tunnel: no source, no destination, no key, no options */
136 static int ipgre_net_id __read_mostly;
138 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
140 struct net_device *fb_tunnel_dev;
143 /* Tunnel hash table */
153 We require exact key match i.e. if a key is present in packet
154 it will match only tunnel with the same key; if it is not present,
155 it will match only keyless tunnel.
157 All keysless packets, if not matched configured keyless tunnels
158 will match fallback tunnel.
161 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
163 #define tunnels_r_l tunnels[3]
164 #define tunnels_r tunnels[2]
165 #define tunnels_l tunnels[1]
166 #define tunnels_wc tunnels[0]
168 * Locking : hash tables are protected by RCU and RTNL
171 #define for_each_ip_tunnel_rcu(start) \
172 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
174 /* often modified stats are per cpu, other are shared (netdev->stats) */
180 struct u64_stats_sync syncp;
183 static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
184 struct rtnl_link_stats64 *tot)
188 for_each_possible_cpu(i) {
189 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
190 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
194 start = u64_stats_fetch_begin_bh(&tstats->syncp);
195 rx_packets = tstats->rx_packets;
196 tx_packets = tstats->tx_packets;
197 rx_bytes = tstats->rx_bytes;
198 tx_bytes = tstats->tx_bytes;
199 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
201 tot->rx_packets += rx_packets;
202 tot->tx_packets += tx_packets;
203 tot->rx_bytes += rx_bytes;
204 tot->tx_bytes += tx_bytes;
207 tot->multicast = dev->stats.multicast;
208 tot->rx_crc_errors = dev->stats.rx_crc_errors;
209 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
210 tot->rx_length_errors = dev->stats.rx_length_errors;
211 tot->rx_frame_errors = dev->stats.rx_frame_errors;
212 tot->rx_errors = dev->stats.rx_errors;
214 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
215 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
216 tot->tx_dropped = dev->stats.tx_dropped;
217 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
218 tot->tx_errors = dev->stats.tx_errors;
223 /* Does key in tunnel parameters match packet */
224 static bool ipgre_key_match(const struct ip_tunnel_parm *p,
225 __be16 flags, __be32 key)
227 if (p->i_flags & GRE_KEY) {
229 return key == p->i_key;
231 return false; /* key expected, none present */
233 return !(flags & GRE_KEY);
236 /* Given src, dst and key, find appropriate for input tunnel. */
238 static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
239 __be32 remote, __be32 local,
240 __be16 flags, __be32 key,
243 struct net *net = dev_net(dev);
244 int link = dev->ifindex;
245 unsigned int h0 = HASH(remote);
246 unsigned int h1 = HASH(key);
247 struct ip_tunnel *t, *cand = NULL;
248 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
249 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
250 ARPHRD_ETHER : ARPHRD_IPGRE;
251 int score, cand_score = 4;
253 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
254 if (local != t->parms.iph.saddr ||
255 remote != t->parms.iph.daddr ||
256 !(t->dev->flags & IFF_UP))
259 if (!ipgre_key_match(&t->parms, flags, key))
262 if (t->dev->type != ARPHRD_IPGRE &&
263 t->dev->type != dev_type)
267 if (t->parms.link != link)
269 if (t->dev->type != dev_type)
274 if (score < cand_score) {
280 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
281 if (remote != t->parms.iph.daddr ||
282 !(t->dev->flags & IFF_UP))
285 if (!ipgre_key_match(&t->parms, flags, key))
288 if (t->dev->type != ARPHRD_IPGRE &&
289 t->dev->type != dev_type)
293 if (t->parms.link != link)
295 if (t->dev->type != dev_type)
300 if (score < cand_score) {
306 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
307 if ((local != t->parms.iph.saddr &&
308 (local != t->parms.iph.daddr ||
309 !ipv4_is_multicast(local))) ||
310 !(t->dev->flags & IFF_UP))
313 if (!ipgre_key_match(&t->parms, flags, key))
316 if (t->dev->type != ARPHRD_IPGRE &&
317 t->dev->type != dev_type)
321 if (t->parms.link != link)
323 if (t->dev->type != dev_type)
328 if (score < cand_score) {
334 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
335 if (t->parms.i_key != key ||
336 !(t->dev->flags & IFF_UP))
339 if (t->dev->type != ARPHRD_IPGRE &&
340 t->dev->type != dev_type)
344 if (t->parms.link != link)
346 if (t->dev->type != dev_type)
351 if (score < cand_score) {
360 dev = ign->fb_tunnel_dev;
361 if (dev->flags & IFF_UP)
362 return netdev_priv(dev);
367 static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
368 struct ip_tunnel_parm *parms)
370 __be32 remote = parms->iph.daddr;
371 __be32 local = parms->iph.saddr;
372 __be32 key = parms->i_key;
373 unsigned int h = HASH(key);
378 if (remote && !ipv4_is_multicast(remote)) {
383 return &ign->tunnels[prio][h];
386 static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
389 return __ipgre_bucket(ign, &t->parms);
392 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
394 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
396 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
397 rcu_assign_pointer(*tp, t);
400 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
402 struct ip_tunnel __rcu **tp;
403 struct ip_tunnel *iter;
405 for (tp = ipgre_bucket(ign, t);
406 (iter = rtnl_dereference(*tp)) != NULL;
409 rcu_assign_pointer(*tp, t->next);
415 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
416 struct ip_tunnel_parm *parms,
419 __be32 remote = parms->iph.daddr;
420 __be32 local = parms->iph.saddr;
421 __be32 key = parms->i_key;
422 int link = parms->link;
424 struct ip_tunnel __rcu **tp;
425 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
427 for (tp = __ipgre_bucket(ign, parms);
428 (t = rtnl_dereference(*tp)) != NULL;
430 if (local == t->parms.iph.saddr &&
431 remote == t->parms.iph.daddr &&
432 key == t->parms.i_key &&
433 link == t->parms.link &&
434 type == t->dev->type)
440 static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
441 struct ip_tunnel_parm *parms, int create)
443 struct ip_tunnel *t, *nt;
444 struct net_device *dev;
446 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
448 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
453 strlcpy(name, parms->name, IFNAMSIZ);
455 strcpy(name, "gre%d");
457 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
461 dev_net_set(dev, net);
463 nt = netdev_priv(dev);
465 dev->rtnl_link_ops = &ipgre_link_ops;
467 dev->mtu = ipgre_tunnel_bind_dev(dev);
469 if (register_netdevice(dev) < 0)
472 /* Can use a lockless transmit, unless we generate output sequences */
473 if (!(nt->parms.o_flags & GRE_SEQ))
474 dev->features |= NETIF_F_LLTX;
477 ipgre_tunnel_link(ign, nt);
485 static void ipgre_tunnel_uninit(struct net_device *dev)
487 struct net *net = dev_net(dev);
488 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
490 ipgre_tunnel_unlink(ign, netdev_priv(dev));
495 static void ipgre_err(struct sk_buff *skb, u32 info)
498 /* All the routers (except for Linux) return only
499 8 bytes of packet payload. It means, that precise relaying of
500 ICMP in the real Internet is absolutely infeasible.
502 Moreover, Cisco "wise men" put GRE key to the third word
503 in GRE header. It makes impossible maintaining even soft state for keyed
504 GRE tunnels with enabled checksum. Tell them "thank you".
506 Well, I wonder, rfc1812 was written by Cisco employee,
507 what the hell these idiots break standards established
511 const struct iphdr *iph = (const struct iphdr *)skb->data;
512 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
513 int grehlen = (iph->ihl<<2) + 4;
514 const int type = icmp_hdr(skb)->type;
515 const int code = icmp_hdr(skb)->code;
521 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
522 if (flags&(GRE_VERSION|GRE_ROUTING))
531 /* If only 8 bytes returned, keyed message will be dropped here */
532 if (skb_headlen(skb) < grehlen)
536 key = *(((__be32 *)p) + (grehlen / 4) - 1);
540 case ICMP_PARAMETERPROB:
543 case ICMP_DEST_UNREACH:
546 case ICMP_PORT_UNREACH:
547 /* Impossible event. */
550 /* All others are translated to HOST_UNREACH.
551 rfc2003 contains "deep thoughts" about NET_UNREACH,
552 I believe they are just ether pollution. --ANK
557 case ICMP_TIME_EXCEEDED:
558 if (code != ICMP_EXC_TTL)
566 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
572 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
573 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
574 t->parms.link, 0, IPPROTO_GRE, 0);
577 if (type == ICMP_REDIRECT) {
578 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
582 if (t->parms.iph.daddr == 0 ||
583 ipv4_is_multicast(t->parms.iph.daddr))
586 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
589 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
593 t->err_time = jiffies;
597 ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
600 if (skb->protocol == htons(ETH_P_IP))
601 inner = old_iph->tos;
602 else if (skb->protocol == htons(ETH_P_IPV6))
603 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
604 return INET_ECN_encapsulate(tos, inner);
607 static int ipgre_rcv(struct sk_buff *skb)
609 const struct iphdr *iph;
615 struct ip_tunnel *tunnel;
620 if (!pskb_may_pull(skb, 16))
625 flags = *(__be16 *)h;
627 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
628 /* - Version must be 0.
629 - We do not support routing headers.
631 if (flags&(GRE_VERSION|GRE_ROUTING))
634 if (flags&GRE_CSUM) {
635 switch (skb->ip_summed) {
636 case CHECKSUM_COMPLETE:
637 csum = csum_fold(skb->csum);
643 csum = __skb_checksum_complete(skb);
644 skb->ip_summed = CHECKSUM_COMPLETE;
649 key = *(__be32 *)(h + offset);
653 seqno = ntohl(*(__be32 *)(h + offset));
658 gre_proto = *(__be16 *)(h + 2);
660 tunnel = ipgre_tunnel_lookup(skb->dev,
661 iph->saddr, iph->daddr, flags, key,
664 struct pcpu_tstats *tstats;
668 skb->protocol = gre_proto;
669 /* WCCP version 1 and 2 protocol decoding.
670 * - Change protocol to IP
671 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
673 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
674 skb->protocol = htons(ETH_P_IP);
675 if ((*(h + offset) & 0xF0) != 0x40)
679 skb->mac_header = skb->network_header;
680 __pskb_pull(skb, offset);
681 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
682 skb->pkt_type = PACKET_HOST;
683 #ifdef CONFIG_NET_IPGRE_BROADCAST
684 if (ipv4_is_multicast(iph->daddr)) {
685 /* Looped back packet, drop it! */
686 if (rt_is_output_route(skb_rtable(skb)))
688 tunnel->dev->stats.multicast++;
689 skb->pkt_type = PACKET_BROADCAST;
693 if (((flags&GRE_CSUM) && csum) ||
694 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
695 tunnel->dev->stats.rx_crc_errors++;
696 tunnel->dev->stats.rx_errors++;
699 if (tunnel->parms.i_flags&GRE_SEQ) {
700 if (!(flags&GRE_SEQ) ||
701 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
702 tunnel->dev->stats.rx_fifo_errors++;
703 tunnel->dev->stats.rx_errors++;
706 tunnel->i_seqno = seqno + 1;
709 /* Warning: All skb pointers will be invalidated! */
710 if (tunnel->dev->type == ARPHRD_ETHER) {
711 if (!pskb_may_pull(skb, ETH_HLEN)) {
712 tunnel->dev->stats.rx_length_errors++;
713 tunnel->dev->stats.rx_errors++;
718 skb->protocol = eth_type_trans(skb, tunnel->dev);
719 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
722 __skb_tunnel_rx(skb, tunnel->dev);
724 skb_reset_network_header(skb);
725 err = IP_ECN_decapsulate(iph, skb);
728 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
729 &iph->saddr, iph->tos);
731 ++tunnel->dev->stats.rx_frame_errors;
732 ++tunnel->dev->stats.rx_errors;
737 tstats = this_cpu_ptr(tunnel->dev->tstats);
738 u64_stats_update_begin(&tstats->syncp);
739 tstats->rx_packets++;
740 tstats->rx_bytes += skb->len;
741 u64_stats_update_end(&tstats->syncp);
743 gro_cells_receive(&tunnel->gro_cells, skb);
746 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
753 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
755 struct ip_tunnel *tunnel = netdev_priv(dev);
756 struct pcpu_tstats *tstats;
757 const struct iphdr *old_iph = ip_hdr(skb);
758 const struct iphdr *tiph;
762 struct rtable *rt; /* Route to the other host */
763 struct net_device *tdev; /* Device to other host */
764 struct iphdr *iph; /* Our new IP header */
765 unsigned int max_headroom; /* The extra header space needed */
770 if (skb->ip_summed == CHECKSUM_PARTIAL &&
771 skb_checksum_help(skb))
774 if (dev->type == ARPHRD_ETHER)
775 IPCB(skb)->flags = 0;
777 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
779 tiph = (const struct iphdr *)skb->data;
781 gre_hlen = tunnel->hlen;
782 tiph = &tunnel->parms.iph;
785 if ((dst = tiph->daddr) == 0) {
788 if (skb_dst(skb) == NULL) {
789 dev->stats.tx_fifo_errors++;
793 if (skb->protocol == htons(ETH_P_IP)) {
794 rt = skb_rtable(skb);
795 dst = rt_nexthop(rt, old_iph->daddr);
797 #if IS_ENABLED(CONFIG_IPV6)
798 else if (skb->protocol == htons(ETH_P_IPV6)) {
799 const struct in6_addr *addr6;
800 struct neighbour *neigh;
801 bool do_tx_error_icmp;
804 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
808 addr6 = (const struct in6_addr *)&neigh->primary_key;
809 addr_type = ipv6_addr_type(addr6);
811 if (addr_type == IPV6_ADDR_ANY) {
812 addr6 = &ipv6_hdr(skb)->daddr;
813 addr_type = ipv6_addr_type(addr6);
816 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
817 do_tx_error_icmp = true;
819 do_tx_error_icmp = false;
820 dst = addr6->s6_addr32[3];
822 neigh_release(neigh);
823 if (do_tx_error_icmp)
834 if (skb->protocol == htons(ETH_P_IP))
836 else if (skb->protocol == htons(ETH_P_IPV6))
837 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
840 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
841 tunnel->parms.o_key, RT_TOS(tos),
844 dev->stats.tx_carrier_errors++;
851 dev->stats.collisions++;
857 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
859 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
862 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
864 if (skb->protocol == htons(ETH_P_IP)) {
865 df |= (old_iph->frag_off&htons(IP_DF));
867 if ((old_iph->frag_off&htons(IP_DF)) &&
868 mtu < ntohs(old_iph->tot_len)) {
869 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
874 #if IS_ENABLED(CONFIG_IPV6)
875 else if (skb->protocol == htons(ETH_P_IPV6)) {
876 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
878 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
879 if ((tunnel->parms.iph.daddr &&
880 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
881 rt6->rt6i_dst.plen == 128) {
882 rt6->rt6i_flags |= RTF_MODIFIED;
883 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
887 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
888 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
895 if (tunnel->err_count > 0) {
896 if (time_before(jiffies,
897 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
900 dst_link_failure(skb);
902 tunnel->err_count = 0;
905 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
907 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
908 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
909 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
910 if (max_headroom > dev->needed_headroom)
911 dev->needed_headroom = max_headroom;
914 dev->stats.tx_dropped++;
919 skb_set_owner_w(new_skb, skb->sk);
922 old_iph = ip_hdr(skb);
925 skb_reset_transport_header(skb);
926 skb_push(skb, gre_hlen);
927 skb_reset_network_header(skb);
928 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
929 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
932 skb_dst_set(skb, &rt->dst);
935 * Push down and install the IPIP header.
940 iph->ihl = sizeof(struct iphdr) >> 2;
942 iph->protocol = IPPROTO_GRE;
943 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
944 iph->daddr = fl4.daddr;
945 iph->saddr = fl4.saddr;
947 if ((iph->ttl = tiph->ttl) == 0) {
948 if (skb->protocol == htons(ETH_P_IP))
949 iph->ttl = old_iph->ttl;
950 #if IS_ENABLED(CONFIG_IPV6)
951 else if (skb->protocol == htons(ETH_P_IPV6))
952 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
955 iph->ttl = ip4_dst_hoplimit(&rt->dst);
958 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
959 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
960 htons(ETH_P_TEB) : skb->protocol;
962 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
963 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
965 if (tunnel->parms.o_flags&GRE_SEQ) {
967 *ptr = htonl(tunnel->o_seqno);
970 if (tunnel->parms.o_flags&GRE_KEY) {
971 *ptr = tunnel->parms.o_key;
974 if (tunnel->parms.o_flags&GRE_CSUM) {
976 *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
981 tstats = this_cpu_ptr(dev->tstats);
982 __IPTUNNEL_XMIT(tstats, &dev->stats);
985 #if IS_ENABLED(CONFIG_IPV6)
987 dst_link_failure(skb);
990 dev->stats.tx_errors++;
995 static int ipgre_tunnel_bind_dev(struct net_device *dev)
997 struct net_device *tdev = NULL;
998 struct ip_tunnel *tunnel;
999 const struct iphdr *iph;
1000 int hlen = LL_MAX_HEADER;
1001 int mtu = ETH_DATA_LEN;
1002 int addend = sizeof(struct iphdr) + 4;
1004 tunnel = netdev_priv(dev);
1005 iph = &tunnel->parms.iph;
1007 /* Guess output device to choose reasonable mtu and needed_headroom */
1013 rt = ip_route_output_gre(dev_net(dev), &fl4,
1014 iph->daddr, iph->saddr,
1015 tunnel->parms.o_key,
1017 tunnel->parms.link);
1023 if (dev->type != ARPHRD_ETHER)
1024 dev->flags |= IFF_POINTOPOINT;
1027 if (!tdev && tunnel->parms.link)
1028 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
1031 hlen = tdev->hard_header_len + tdev->needed_headroom;
1034 dev->iflink = tunnel->parms.link;
1036 /* Precalculate GRE options length */
1037 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1038 if (tunnel->parms.o_flags&GRE_CSUM)
1040 if (tunnel->parms.o_flags&GRE_KEY)
1042 if (tunnel->parms.o_flags&GRE_SEQ)
1045 dev->needed_headroom = addend + hlen;
1046 mtu -= dev->hard_header_len + addend;
1051 tunnel->hlen = addend;
1057 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1060 struct ip_tunnel_parm p;
1061 struct ip_tunnel *t;
1062 struct net *net = dev_net(dev);
1063 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1068 if (dev == ign->fb_tunnel_dev) {
1069 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1073 t = ipgre_tunnel_locate(net, &p, 0);
1076 t = netdev_priv(dev);
1077 memcpy(&p, &t->parms, sizeof(p));
1078 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1085 if (!capable(CAP_NET_ADMIN))
1089 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1093 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1094 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1095 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1098 p.iph.frag_off |= htons(IP_DF);
1100 if (!(p.i_flags&GRE_KEY))
1102 if (!(p.o_flags&GRE_KEY))
1105 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1107 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1109 if (t->dev != dev) {
1114 unsigned int nflags = 0;
1116 t = netdev_priv(dev);
1118 if (ipv4_is_multicast(p.iph.daddr))
1119 nflags = IFF_BROADCAST;
1120 else if (p.iph.daddr)
1121 nflags = IFF_POINTOPOINT;
1123 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1127 ipgre_tunnel_unlink(ign, t);
1129 t->parms.iph.saddr = p.iph.saddr;
1130 t->parms.iph.daddr = p.iph.daddr;
1131 t->parms.i_key = p.i_key;
1132 t->parms.o_key = p.o_key;
1133 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1134 memcpy(dev->broadcast, &p.iph.daddr, 4);
1135 ipgre_tunnel_link(ign, t);
1136 netdev_state_change(dev);
1142 if (cmd == SIOCCHGTUNNEL) {
1143 t->parms.iph.ttl = p.iph.ttl;
1144 t->parms.iph.tos = p.iph.tos;
1145 t->parms.iph.frag_off = p.iph.frag_off;
1146 if (t->parms.link != p.link) {
1147 t->parms.link = p.link;
1148 dev->mtu = ipgre_tunnel_bind_dev(dev);
1149 netdev_state_change(dev);
1152 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1155 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1160 if (!capable(CAP_NET_ADMIN))
1163 if (dev == ign->fb_tunnel_dev) {
1165 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1168 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1171 if (t == netdev_priv(ign->fb_tunnel_dev))
1175 unregister_netdevice(dev);
1187 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1189 struct ip_tunnel *tunnel = netdev_priv(dev);
1191 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1197 /* Nice toy. Unfortunately, useless in real life :-)
1198 It allows to construct virtual multiprotocol broadcast "LAN"
1199 over the Internet, provided multicast routing is tuned.
1202 I have no idea was this bicycle invented before me,
1203 so that I had to set ARPHRD_IPGRE to a random value.
1204 I have an impression, that Cisco could make something similar,
1205 but this feature is apparently missing in IOS<=11.2(8).
1207 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1208 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1210 ping -t 255 224.66.66.66
1212 If nobody answers, mbone does not work.
1214 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1215 ip addr add 10.66.66.<somewhat>/24 dev Universe
1216 ifconfig Universe up
1217 ifconfig Universe add fe80::<Your_real_addr>/10
1218 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1221 ftp fec0:6666:6666::193.233.7.65
1226 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1227 unsigned short type,
1228 const void *daddr, const void *saddr, unsigned int len)
1230 struct ip_tunnel *t = netdev_priv(dev);
1231 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1232 __be16 *p = (__be16 *)(iph+1);
1234 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1235 p[0] = t->parms.o_flags;
1239 * Set the source hardware address.
1243 memcpy(&iph->saddr, saddr, 4);
1245 memcpy(&iph->daddr, daddr, 4);
1252 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1254 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
1255 memcpy(haddr, &iph->saddr, 4);
1259 static const struct header_ops ipgre_header_ops = {
1260 .create = ipgre_header,
1261 .parse = ipgre_header_parse,
1264 #ifdef CONFIG_NET_IPGRE_BROADCAST
1265 static int ipgre_open(struct net_device *dev)
1267 struct ip_tunnel *t = netdev_priv(dev);
1269 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1273 rt = ip_route_output_gre(dev_net(dev), &fl4,
1277 RT_TOS(t->parms.iph.tos),
1280 return -EADDRNOTAVAIL;
1283 if (__in_dev_get_rtnl(dev) == NULL)
1284 return -EADDRNOTAVAIL;
1285 t->mlink = dev->ifindex;
1286 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1291 static int ipgre_close(struct net_device *dev)
1293 struct ip_tunnel *t = netdev_priv(dev);
1295 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1296 struct in_device *in_dev;
1297 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1299 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1306 static const struct net_device_ops ipgre_netdev_ops = {
1307 .ndo_init = ipgre_tunnel_init,
1308 .ndo_uninit = ipgre_tunnel_uninit,
1309 #ifdef CONFIG_NET_IPGRE_BROADCAST
1310 .ndo_open = ipgre_open,
1311 .ndo_stop = ipgre_close,
1313 .ndo_start_xmit = ipgre_tunnel_xmit,
1314 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1315 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1316 .ndo_get_stats64 = ipgre_get_stats64,
1319 static void ipgre_dev_free(struct net_device *dev)
1321 struct ip_tunnel *tunnel = netdev_priv(dev);
1323 gro_cells_destroy(&tunnel->gro_cells);
1324 free_percpu(dev->tstats);
1328 #define GRE_FEATURES (NETIF_F_SG | \
1329 NETIF_F_FRAGLIST | \
1333 static void ipgre_tunnel_setup(struct net_device *dev)
1335 dev->netdev_ops = &ipgre_netdev_ops;
1336 dev->destructor = ipgre_dev_free;
1338 dev->type = ARPHRD_IPGRE;
1339 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1340 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1341 dev->flags = IFF_NOARP;
1344 dev->features |= NETIF_F_NETNS_LOCAL;
1345 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1347 dev->features |= GRE_FEATURES;
1348 dev->hw_features |= GRE_FEATURES;
1351 static int ipgre_tunnel_init(struct net_device *dev)
1353 struct ip_tunnel *tunnel;
1357 tunnel = netdev_priv(dev);
1358 iph = &tunnel->parms.iph;
1361 strcpy(tunnel->parms.name, dev->name);
1363 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1364 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1367 #ifdef CONFIG_NET_IPGRE_BROADCAST
1368 if (ipv4_is_multicast(iph->daddr)) {
1371 dev->flags = IFF_BROADCAST;
1372 dev->header_ops = &ipgre_header_ops;
1376 dev->header_ops = &ipgre_header_ops;
1378 dev->tstats = alloc_percpu(struct pcpu_tstats);
1382 err = gro_cells_init(&tunnel->gro_cells, dev);
1384 free_percpu(dev->tstats);
1391 static void ipgre_fb_tunnel_init(struct net_device *dev)
1393 struct ip_tunnel *tunnel = netdev_priv(dev);
1394 struct iphdr *iph = &tunnel->parms.iph;
1397 strcpy(tunnel->parms.name, dev->name);
1400 iph->protocol = IPPROTO_GRE;
1402 tunnel->hlen = sizeof(struct iphdr) + 4;
1408 static const struct gre_protocol ipgre_protocol = {
1409 .handler = ipgre_rcv,
1410 .err_handler = ipgre_err,
1413 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1417 for (prio = 0; prio < 4; prio++) {
1419 for (h = 0; h < HASH_SIZE; h++) {
1420 struct ip_tunnel *t;
1422 t = rtnl_dereference(ign->tunnels[prio][h]);
1425 unregister_netdevice_queue(t->dev, head);
1426 t = rtnl_dereference(t->next);
1432 static int __net_init ipgre_init_net(struct net *net)
1434 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1437 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1438 ipgre_tunnel_setup);
1439 if (!ign->fb_tunnel_dev) {
1443 dev_net_set(ign->fb_tunnel_dev, net);
1445 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1446 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1448 if ((err = register_netdev(ign->fb_tunnel_dev)))
1451 rcu_assign_pointer(ign->tunnels_wc[0],
1452 netdev_priv(ign->fb_tunnel_dev));
1456 ipgre_dev_free(ign->fb_tunnel_dev);
1461 static void __net_exit ipgre_exit_net(struct net *net)
1463 struct ipgre_net *ign;
1466 ign = net_generic(net, ipgre_net_id);
1468 ipgre_destroy_tunnels(ign, &list);
1469 unregister_netdevice_many(&list);
1473 static struct pernet_operations ipgre_net_ops = {
1474 .init = ipgre_init_net,
1475 .exit = ipgre_exit_net,
1476 .id = &ipgre_net_id,
1477 .size = sizeof(struct ipgre_net),
1480 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1488 if (data[IFLA_GRE_IFLAGS])
1489 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1490 if (data[IFLA_GRE_OFLAGS])
1491 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1492 if (flags & (GRE_VERSION|GRE_ROUTING))
1498 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1502 if (tb[IFLA_ADDRESS]) {
1503 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1505 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1506 return -EADDRNOTAVAIL;
1512 if (data[IFLA_GRE_REMOTE]) {
1513 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1519 return ipgre_tunnel_validate(tb, data);
1522 static void ipgre_netlink_parms(struct nlattr *data[],
1523 struct ip_tunnel_parm *parms)
1525 memset(parms, 0, sizeof(*parms));
1527 parms->iph.protocol = IPPROTO_GRE;
1532 if (data[IFLA_GRE_LINK])
1533 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1535 if (data[IFLA_GRE_IFLAGS])
1536 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1538 if (data[IFLA_GRE_OFLAGS])
1539 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1541 if (data[IFLA_GRE_IKEY])
1542 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1544 if (data[IFLA_GRE_OKEY])
1545 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1547 if (data[IFLA_GRE_LOCAL])
1548 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1550 if (data[IFLA_GRE_REMOTE])
1551 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1553 if (data[IFLA_GRE_TTL])
1554 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1556 if (data[IFLA_GRE_TOS])
1557 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1559 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1560 parms->iph.frag_off = htons(IP_DF);
1563 static int ipgre_tap_init(struct net_device *dev)
1565 struct ip_tunnel *tunnel;
1567 tunnel = netdev_priv(dev);
1570 strcpy(tunnel->parms.name, dev->name);
1572 ipgre_tunnel_bind_dev(dev);
1574 dev->tstats = alloc_percpu(struct pcpu_tstats);
1581 static const struct net_device_ops ipgre_tap_netdev_ops = {
1582 .ndo_init = ipgre_tap_init,
1583 .ndo_uninit = ipgre_tunnel_uninit,
1584 .ndo_start_xmit = ipgre_tunnel_xmit,
1585 .ndo_set_mac_address = eth_mac_addr,
1586 .ndo_validate_addr = eth_validate_addr,
1587 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1588 .ndo_get_stats64 = ipgre_get_stats64,
1591 static void ipgre_tap_setup(struct net_device *dev)
1596 dev->netdev_ops = &ipgre_tap_netdev_ops;
1597 dev->destructor = ipgre_dev_free;
1600 dev->features |= NETIF_F_NETNS_LOCAL;
1603 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1604 struct nlattr *data[])
1606 struct ip_tunnel *nt;
1607 struct net *net = dev_net(dev);
1608 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1612 nt = netdev_priv(dev);
1613 ipgre_netlink_parms(data, &nt->parms);
1615 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1618 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1619 eth_hw_addr_random(dev);
1621 mtu = ipgre_tunnel_bind_dev(dev);
1625 /* Can use a lockless transmit, unless we generate output sequences */
1626 if (!(nt->parms.o_flags & GRE_SEQ))
1627 dev->features |= NETIF_F_LLTX;
1629 err = register_netdevice(dev);
1634 ipgre_tunnel_link(ign, nt);
1640 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1641 struct nlattr *data[])
1643 struct ip_tunnel *t, *nt;
1644 struct net *net = dev_net(dev);
1645 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1646 struct ip_tunnel_parm p;
1649 if (dev == ign->fb_tunnel_dev)
1652 nt = netdev_priv(dev);
1653 ipgre_netlink_parms(data, &p);
1655 t = ipgre_tunnel_locate(net, &p, 0);
1663 if (dev->type != ARPHRD_ETHER) {
1664 unsigned int nflags = 0;
1666 if (ipv4_is_multicast(p.iph.daddr))
1667 nflags = IFF_BROADCAST;
1668 else if (p.iph.daddr)
1669 nflags = IFF_POINTOPOINT;
1671 if ((dev->flags ^ nflags) &
1672 (IFF_POINTOPOINT | IFF_BROADCAST))
1676 ipgre_tunnel_unlink(ign, t);
1677 t->parms.iph.saddr = p.iph.saddr;
1678 t->parms.iph.daddr = p.iph.daddr;
1679 t->parms.i_key = p.i_key;
1680 if (dev->type != ARPHRD_ETHER) {
1681 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1682 memcpy(dev->broadcast, &p.iph.daddr, 4);
1684 ipgre_tunnel_link(ign, t);
1685 netdev_state_change(dev);
1688 t->parms.o_key = p.o_key;
1689 t->parms.iph.ttl = p.iph.ttl;
1690 t->parms.iph.tos = p.iph.tos;
1691 t->parms.iph.frag_off = p.iph.frag_off;
1693 if (t->parms.link != p.link) {
1694 t->parms.link = p.link;
1695 mtu = ipgre_tunnel_bind_dev(dev);
1698 netdev_state_change(dev);
1704 static size_t ipgre_get_size(const struct net_device *dev)
1709 /* IFLA_GRE_IFLAGS */
1711 /* IFLA_GRE_OFLAGS */
1717 /* IFLA_GRE_LOCAL */
1719 /* IFLA_GRE_REMOTE */
1725 /* IFLA_GRE_PMTUDISC */
1730 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1732 struct ip_tunnel *t = netdev_priv(dev);
1733 struct ip_tunnel_parm *p = &t->parms;
1735 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1736 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1737 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1738 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1739 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1740 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1741 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1742 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1743 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1744 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1745 !!(p->iph.frag_off & htons(IP_DF))))
1746 goto nla_put_failure;
1753 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1754 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1755 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1756 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1757 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1758 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1759 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1760 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1761 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1762 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1763 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1766 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1768 .maxtype = IFLA_GRE_MAX,
1769 .policy = ipgre_policy,
1770 .priv_size = sizeof(struct ip_tunnel),
1771 .setup = ipgre_tunnel_setup,
1772 .validate = ipgre_tunnel_validate,
1773 .newlink = ipgre_newlink,
1774 .changelink = ipgre_changelink,
1775 .get_size = ipgre_get_size,
1776 .fill_info = ipgre_fill_info,
1779 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1781 .maxtype = IFLA_GRE_MAX,
1782 .policy = ipgre_policy,
1783 .priv_size = sizeof(struct ip_tunnel),
1784 .setup = ipgre_tap_setup,
1785 .validate = ipgre_tap_validate,
1786 .newlink = ipgre_newlink,
1787 .changelink = ipgre_changelink,
1788 .get_size = ipgre_get_size,
1789 .fill_info = ipgre_fill_info,
1793 * And now the modules code and kernel interface.
1796 static int __init ipgre_init(void)
1800 pr_info("GRE over IPv4 tunneling driver\n");
1802 err = register_pernet_device(&ipgre_net_ops);
1806 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1808 pr_info("%s: can't add protocol\n", __func__);
1809 goto add_proto_failed;
1812 err = rtnl_link_register(&ipgre_link_ops);
1814 goto rtnl_link_failed;
1816 err = rtnl_link_register(&ipgre_tap_ops);
1818 goto tap_ops_failed;
1824 rtnl_link_unregister(&ipgre_link_ops);
1826 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1828 unregister_pernet_device(&ipgre_net_ops);
1832 static void __exit ipgre_fini(void)
1834 rtnl_link_unregister(&ipgre_tap_ops);
1835 rtnl_link_unregister(&ipgre_link_ops);
1836 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1837 pr_info("%s: can't remove protocol\n", __func__);
1838 unregister_pernet_device(&ipgre_net_ops);
1841 module_init(ipgre_init);
1842 module_exit(ipgre_fini);
1843 MODULE_LICENSE("GPL");
1844 MODULE_ALIAS_RTNL_LINK("gre");
1845 MODULE_ALIAS_RTNL_LINK("gretap");
1846 MODULE_ALIAS_NETDEV("gre0");