1 // SPDX-License-Identifier: GPL-2.0
3 /* In-place tunneling */
8 #include <linux/stddef.h>
10 #include <linux/if_ether.h>
13 #include <linux/ipv6.h>
14 #include <linux/mpls.h>
15 #include <linux/tcp.h>
16 #include <linux/udp.h>
17 #include <linux/pkt_cls.h>
18 #include <linux/types.h>
20 #include <bpf/bpf_endian.h>
21 #include <bpf/bpf_helpers.h>
23 static const int cfg_port = 8000;
25 static const int cfg_udp_src = 20000;
27 #define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
30 #define MPLS_OVER_UDP_PORT 6635
31 #define ETH_OVER_UDP_PORT 7777
32 #define VXLAN_UDP_PORT 8472
34 #define EXTPROTO_VXLAN 0x1
36 #define VXLAN_N_VID (1u << 24)
37 #define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8)
38 #define VXLAN_FLAGS 0x8
42 #define NEXTHDR_DEST 60
45 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
46 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
47 MPLS_LS_S_MASK | 0xff);
52 } __attribute__((packed));
57 } __attribute__((packed));
67 __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
68 } __attribute__((packed));
73 __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
74 } __attribute__((packed));
76 static __always_inline void set_ipv4_csum(struct iphdr *iph)
78 __u16 *iph16 = (__u16 *)iph;
84 #pragma clang loop unroll(full)
85 for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
88 iph->check = ~((csum & 0xffff) + (csum >> 16));
91 static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
92 __u16 l2_proto, __u16 ext_proto)
94 __u16 udp_dst = UDP_PORT;
95 struct iphdr iph_inner;
103 /* Most tests encapsulate a packet into a tunnel with the same
104 * network protocol, and derive the outer header fields from
107 * The 6in4 case tests different inner and outer protocols. As
108 * the inner is ipv6, but the outer expects an ipv4 header as
109 * input, manually build a struct iphdr based on the ipv6hdr.
111 if (encap_proto == IPPROTO_IPV6) {
112 const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
113 const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
114 struct ipv6hdr iph6_inner;
116 /* Read the IPv6 header */
117 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
118 sizeof(iph6_inner)) < 0)
121 /* Derive the IPv4 header fields from the IPv6 header */
122 memset(&iph_inner, 0, sizeof(iph_inner));
123 iph_inner.version = 4;
125 iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
126 bpf_ntohs(iph6_inner.payload_len));
127 iph_inner.ttl = iph6_inner.hop_limit - 1;
128 iph_inner.protocol = iph6_inner.nexthdr;
129 iph_inner.saddr = __bpf_constant_htonl(saddr);
130 iph_inner.daddr = __bpf_constant_htonl(daddr);
132 tcp_off = sizeof(iph6_inner);
134 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
135 sizeof(iph_inner)) < 0)
138 tcp_off = sizeof(iph_inner);
141 /* filter only packets we want */
142 if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
145 if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
146 &tcph, sizeof(tcph)) < 0)
149 if (tcph.dest != __bpf_constant_htons(cfg_port))
152 olen = sizeof(h_outer.ip);
155 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
159 l2_len = sizeof(mpls_label);
160 udp_dst = MPLS_OVER_UDP_PORT;
164 if (ext_proto & EXTPROTO_VXLAN) {
165 udp_dst = VXLAN_UDP_PORT;
166 l2_len += sizeof(struct vxlanhdr);
168 udp_dst = ETH_OVER_UDP_PORT;
171 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
173 switch (encap_proto) {
175 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
176 olen += sizeof(h_outer.l4hdr.gre);
177 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
178 h_outer.l4hdr.gre.flags = 0;
181 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
182 olen += sizeof(h_outer.l4hdr.udp);
183 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
184 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
185 h_outer.l4hdr.udp.check = 0;
186 h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
187 sizeof(h_outer.l4hdr.udp) +
197 /* add L2 encap (if specified) */
198 l2_hdr = (__u8 *)&h_outer + olen;
201 *(__u32 *)l2_hdr = mpls_label;
204 flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
206 if (ext_proto & EXTPROTO_VXLAN) {
207 struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
209 vxlan_hdr->vx_flags = VXLAN_FLAGS;
210 vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
212 l2_hdr += sizeof(struct vxlanhdr);
215 if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
222 /* add room between mac and network header */
223 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
226 /* prepare new outer network header */
227 h_outer.ip = iph_inner;
228 h_outer.ip.tot_len = bpf_htons(olen +
229 bpf_ntohs(h_outer.ip.tot_len));
230 h_outer.ip.protocol = encap_proto;
232 set_ipv4_csum((void *)&h_outer.ip);
234 /* store new outer network header */
235 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
236 BPF_F_INVALIDATE_HASH) < 0)
239 /* if changing outer proto type, update eth->h_proto */
240 if (encap_proto == IPPROTO_IPV6) {
243 if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0)
245 eth.h_proto = bpf_htons(ETH_P_IP);
246 if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0)
253 static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
256 return __encap_ipv4(skb, encap_proto, l2_proto, 0);
259 static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
260 __u16 l2_proto, __u16 ext_proto)
262 __u16 udp_dst = UDP_PORT;
263 struct ipv6hdr iph_inner;
264 struct v6hdr h_outer;
271 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
272 sizeof(iph_inner)) < 0)
275 /* filter only packets we want */
276 if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
277 &tcph, sizeof(tcph)) < 0)
280 if (tcph.dest != __bpf_constant_htons(cfg_port))
283 olen = sizeof(h_outer.ip);
286 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
290 l2_len = sizeof(mpls_label);
291 udp_dst = MPLS_OVER_UDP_PORT;
295 if (ext_proto & EXTPROTO_VXLAN) {
296 udp_dst = VXLAN_UDP_PORT;
297 l2_len += sizeof(struct vxlanhdr);
299 udp_dst = ETH_OVER_UDP_PORT;
302 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
304 switch (encap_proto) {
306 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
307 olen += sizeof(h_outer.l4hdr.gre);
308 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
309 h_outer.l4hdr.gre.flags = 0;
312 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
313 olen += sizeof(h_outer.l4hdr.udp);
314 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
315 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
316 tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
317 sizeof(h_outer.l4hdr.udp) + l2_len;
318 h_outer.l4hdr.udp.check = 0;
319 h_outer.l4hdr.udp.len = bpf_htons(tot_len);
327 /* add L2 encap (if specified) */
328 l2_hdr = (__u8 *)&h_outer + olen;
331 *(__u32 *)l2_hdr = mpls_label;
334 flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
336 if (ext_proto & EXTPROTO_VXLAN) {
337 struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
339 vxlan_hdr->vx_flags = VXLAN_FLAGS;
340 vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
342 l2_hdr += sizeof(struct vxlanhdr);
345 if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
351 /* add room between mac and network header */
352 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
355 /* prepare new outer network header */
356 h_outer.ip = iph_inner;
357 h_outer.ip.payload_len = bpf_htons(olen +
358 bpf_ntohs(h_outer.ip.payload_len));
360 h_outer.ip.nexthdr = encap_proto;
362 /* store new outer network header */
363 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
364 BPF_F_INVALIDATE_HASH) < 0)
370 static int encap_ipv6_ipip6(struct __sk_buff *skb)
372 struct iphdr iph_inner;
373 struct v6hdr h_outer;
379 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
380 sizeof(iph_inner)) < 0)
383 /* filter only packets we want */
384 if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
385 &tcph, sizeof(tcph)) < 0)
388 if (tcph.dest != __bpf_constant_htons(cfg_port))
391 olen = sizeof(h_outer.ip);
393 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
395 /* add room between mac and network header */
396 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
399 /* prepare new outer network header */
400 memset(&h_outer.ip, 0, sizeof(h_outer.ip));
401 h_outer.ip.version = 6;
402 h_outer.ip.hop_limit = iph_inner.ttl;
403 h_outer.ip.saddr.s6_addr[1] = 0xfd;
404 h_outer.ip.saddr.s6_addr[15] = 1;
405 h_outer.ip.daddr.s6_addr[1] = 0xfd;
406 h_outer.ip.daddr.s6_addr[15] = 2;
407 h_outer.ip.payload_len = iph_inner.tot_len;
408 h_outer.ip.nexthdr = IPPROTO_IPIP;
410 /* store new outer network header */
411 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
412 BPF_F_INVALIDATE_HASH) < 0)
415 /* update eth->h_proto */
416 if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0)
418 eth.h_proto = bpf_htons(ETH_P_IPV6);
419 if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0)
425 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
428 return __encap_ipv6(skb, encap_proto, l2_proto, 0);
431 SEC("encap_ipip_none")
432 int __encap_ipip_none(struct __sk_buff *skb)
434 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
435 return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
440 SEC("encap_gre_none")
441 int __encap_gre_none(struct __sk_buff *skb)
443 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
444 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
449 SEC("encap_gre_mpls")
450 int __encap_gre_mpls(struct __sk_buff *skb)
452 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
453 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
459 int __encap_gre_eth(struct __sk_buff *skb)
461 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
462 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
467 SEC("encap_udp_none")
468 int __encap_udp_none(struct __sk_buff *skb)
470 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
471 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
476 SEC("encap_udp_mpls")
477 int __encap_udp_mpls(struct __sk_buff *skb)
479 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
480 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
486 int __encap_udp_eth(struct __sk_buff *skb)
488 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
489 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
494 SEC("encap_vxlan_eth")
495 int __encap_vxlan_eth(struct __sk_buff *skb)
497 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
498 return __encap_ipv4(skb, IPPROTO_UDP,
505 SEC("encap_sit_none")
506 int __encap_sit_none(struct __sk_buff *skb)
508 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
509 return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
514 SEC("encap_ip6tnl_none")
515 int __encap_ip6tnl_none(struct __sk_buff *skb)
517 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
518 return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
523 SEC("encap_ipip6_none")
524 int __encap_ipip6_none(struct __sk_buff *skb)
526 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
527 return encap_ipv6_ipip6(skb);
532 SEC("encap_ip6gre_none")
533 int __encap_ip6gre_none(struct __sk_buff *skb)
535 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
536 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
541 SEC("encap_ip6gre_mpls")
542 int __encap_ip6gre_mpls(struct __sk_buff *skb)
544 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
545 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
550 SEC("encap_ip6gre_eth")
551 int __encap_ip6gre_eth(struct __sk_buff *skb)
553 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
554 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
559 SEC("encap_ip6udp_none")
560 int __encap_ip6udp_none(struct __sk_buff *skb)
562 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
563 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
568 SEC("encap_ip6udp_mpls")
569 int __encap_ip6udp_mpls(struct __sk_buff *skb)
571 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
572 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
577 SEC("encap_ip6udp_eth")
578 int __encap_ip6udp_eth(struct __sk_buff *skb)
580 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
581 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
586 SEC("encap_ip6vxlan_eth")
587 int __encap_ip6vxlan_eth(struct __sk_buff *skb)
589 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
590 return __encap_ipv6(skb, IPPROTO_UDP,
597 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
599 __u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
600 struct ipv6_opt_hdr ip6_opt_hdr;
607 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
610 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
613 if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
614 sizeof(ip6_opt_hdr)) < 0)
616 switch (ip6_opt_hdr.nexthdr) {
618 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
621 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
628 olen += sizeof(struct gre_hdr);
629 if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
631 switch (bpf_ntohs(greh.protocol)) {
633 olen += sizeof(mpls_label);
641 olen += sizeof(struct udphdr);
642 if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
644 switch (bpf_ntohs(udph.dest)) {
645 case MPLS_OVER_UDP_PORT:
646 olen += sizeof(mpls_label);
648 case ETH_OVER_UDP_PORT:
652 olen += ETH_HLEN + sizeof(struct vxlanhdr);
660 if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
666 static int decap_ipv4(struct __sk_buff *skb)
668 struct iphdr iph_outer;
670 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
671 sizeof(iph_outer)) < 0)
674 if (iph_outer.ihl != 5)
677 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
681 static int decap_ipv6(struct __sk_buff *skb)
683 struct ipv6hdr iph_outer;
685 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
686 sizeof(iph_outer)) < 0)
689 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
694 int decap_f(struct __sk_buff *skb)
696 switch (skb->protocol) {
697 case __bpf_constant_htons(ETH_P_IP):
698 return decap_ipv4(skb);
699 case __bpf_constant_htons(ETH_P_IPV6):
700 return decap_ipv6(skb);
702 /* does not match, ignore */
707 char __license[] SEC("license") = "GPL";