1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
4 * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5 * between src and dst. The netns fwd has veth links to each src and dst. The
6 * client is in src and server in dst. The test installs a TC BPF program to each
7 * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8 * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9 * switch from ingress side; it also installs a checker prog on the egress side
10 * to drop unexpected traffic.
13 #include <arpa/inet.h>
14 #include <linux/if_tun.h>
15 #include <linux/limits.h>
16 #include <linux/sysctl.h>
17 #include <linux/time_types.h>
18 #include <linux/net_tstamp.h>
25 #include "test_progs.h"
26 #include "network_helpers.h"
27 #include "test_tc_neigh_fib.skel.h"
28 #include "test_tc_neigh.skel.h"
29 #include "test_tc_peer.skel.h"
30 #include "test_tc_dtime.skel.h"
33 #define TCP_TX_DELAY 37
36 #define NS_SRC "ns_src"
37 #define NS_FWD "ns_fwd"
38 #define NS_DST "ns_dst"
40 #define IP4_SRC "172.16.1.100"
41 #define IP4_DST "172.16.2.100"
42 #define IP4_TUN_SRC "172.17.1.100"
43 #define IP4_TUN_FWD "172.17.1.200"
46 #define IP6_SRC "0::1:dead:beef:cafe"
47 #define IP6_DST "0::2:dead:beef:cafe"
48 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
49 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
52 #define IP4_SLL "169.254.0.1"
53 #define IP4_DLL "169.254.0.2"
54 #define IP4_NET "169.254.0.0"
56 #define MAC_DST_FWD "00:11:22:33:44:55"
57 #define MAC_DST "00:22:33:44:55:66"
59 #define IFADDR_STR_LEN 18
60 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
62 #define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
63 #define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
64 #define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
66 #define TIMEOUT_MILLIS 10000
67 #define NSEC_PER_SEC 1000000000ULL
69 #define log_err(MSG, ...) \
70 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
71 __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
73 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
75 static int write_file(const char *path, const char *newval)
79 f = fopen(path, "r+");
82 if (fwrite(newval, strlen(newval), 1, f) != 1) {
83 log_err("writing to %s failed", path);
91 static int netns_setup_namespaces(const char *verb)
93 const char * const *ns = namespaces;
97 snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
98 if (!ASSERT_OK(system(cmd), cmd))
105 static void netns_setup_namespaces_nofail(const char *verb)
107 const char * const *ns = namespaces;
111 snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
117 struct netns_setup_result {
118 int ifindex_veth_src;
119 int ifindex_veth_src_fwd;
120 int ifindex_veth_dst;
121 int ifindex_veth_dst_fwd;
124 static int get_ifaddr(const char *name, char *ifaddr)
130 snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
131 f = fopen(path, "r");
132 if (!ASSERT_OK_PTR(f, path))
135 ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
136 if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
144 #define SYS(fmt, ...) \
147 snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
148 if (!ASSERT_OK(system(cmd), cmd)) \
152 static int netns_setup_links_and_routes(struct netns_setup_result *result)
154 struct nstoken *nstoken = NULL;
155 char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
157 SYS("ip link add veth_src type veth peer name veth_src_fwd");
158 SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
160 SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
161 SYS("ip link set veth_dst address " MAC_DST);
163 if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
166 result->ifindex_veth_src = if_nametoindex("veth_src");
167 if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src"))
170 result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd");
171 if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd"))
174 result->ifindex_veth_dst = if_nametoindex("veth_dst");
175 if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst"))
178 result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd");
179 if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd"))
182 SYS("ip link set veth_src netns " NS_SRC);
183 SYS("ip link set veth_src_fwd netns " NS_FWD);
184 SYS("ip link set veth_dst_fwd netns " NS_FWD);
185 SYS("ip link set veth_dst netns " NS_DST);
187 /** setup in 'src' namespace */
188 nstoken = open_netns(NS_SRC);
189 if (!ASSERT_OK_PTR(nstoken, "setns src"))
192 SYS("ip addr add " IP4_SRC "/32 dev veth_src");
193 SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
194 SYS("ip link set dev veth_src up");
196 SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
197 SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
198 SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
200 SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
202 SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
205 close_netns(nstoken);
207 /** setup in 'fwd' namespace */
208 nstoken = open_netns(NS_FWD);
209 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
212 /* The fwd netns automatically gets a v6 LL address / routes, but also
213 * needs v4 one in order to start ARP probing. IP4_NET route is added
214 * to the endpoints so that the ARP processing will reply.
216 SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
217 SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
218 SYS("ip link set dev veth_src_fwd up");
219 SYS("ip link set dev veth_dst_fwd up");
221 SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
222 SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
223 SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
224 SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
226 close_netns(nstoken);
228 /** setup in 'dst' namespace */
229 nstoken = open_netns(NS_DST);
230 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
233 SYS("ip addr add " IP4_DST "/32 dev veth_dst");
234 SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
235 SYS("ip link set dev veth_dst up");
237 SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
238 SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
239 SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
241 SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
242 SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
244 close_netns(nstoken);
249 close_netns(nstoken);
253 static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
255 char err_str[128], ifname[16];
258 qdisc_hook->ifindex = ifindex;
259 qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
260 err = bpf_tc_hook_create(qdisc_hook);
261 snprintf(err_str, sizeof(err_str),
262 "qdisc add dev %s clsact",
263 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
264 err_str[sizeof(err_str) - 1] = 0;
265 ASSERT_OK(err, err_str);
270 static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
271 enum bpf_tc_attach_point xgress,
272 const struct bpf_program *prog, int priority)
274 LIBBPF_OPTS(bpf_tc_opts, tc_attach);
275 char err_str[128], ifname[16];
278 qdisc_hook->attach_point = xgress;
279 tc_attach.prog_fd = bpf_program__fd(prog);
280 tc_attach.priority = priority;
281 err = bpf_tc_attach(qdisc_hook, &tc_attach);
282 snprintf(err_str, sizeof(err_str),
283 "filter add dev %s %s prio %d bpf da %s",
284 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
285 xgress == BPF_TC_INGRESS ? "ingress" : "egress",
286 priority, bpf_program__name(prog));
287 err_str[sizeof(err_str) - 1] = 0;
288 ASSERT_OK(err, err_str);
293 #define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \
294 if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \
298 #define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \
299 if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \
303 static int netns_load_bpf(void)
305 SYS("tc qdisc add dev veth_src_fwd clsact");
306 SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
308 SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
311 SYS("tc qdisc add dev veth_dst_fwd clsact");
312 SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
314 SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
322 static void test_tcp(int family, const char *addr, __u16 port)
324 int listen_fd = -1, accept_fd = -1, client_fd = -1;
325 char buf[] = "testing testing";
327 struct nstoken *nstoken;
329 nstoken = open_netns(NS_DST);
330 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
333 listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
334 if (!ASSERT_GE(listen_fd, 0, "listen"))
337 close_netns(nstoken);
338 nstoken = open_netns(NS_SRC);
339 if (!ASSERT_OK_PTR(nstoken, "setns src"))
342 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
343 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
346 accept_fd = accept(listen_fd, NULL, NULL);
347 if (!ASSERT_GE(accept_fd, 0, "accept"))
350 if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
353 n = write(client_fd, buf, sizeof(buf));
354 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
357 n = read(accept_fd, buf, sizeof(buf));
358 ASSERT_EQ(n, sizeof(buf), "recv from server");
362 close_netns(nstoken);
371 static int test_ping(int family, const char *addr)
373 SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
379 static void test_connectivity(void)
381 test_tcp(AF_INET, IP4_DST, IP4_PORT);
382 test_ping(AF_INET, IP4_DST);
383 test_tcp(AF_INET6, IP6_DST, IP6_PORT);
384 test_ping(AF_INET6, IP6_DST);
387 static int set_forwarding(bool enable)
391 err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
392 if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
395 err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
396 if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
402 static void rcv_tstamp(int fd, const char *expected, size_t s)
404 struct __kernel_timespec pkt_ts = {};
405 char ctl[CMSG_SPACE(sizeof(pkt_ts))];
406 struct timespec now_ts;
407 struct msghdr msg = {};
408 __u64 now_ns, pkt_ns;
409 struct cmsghdr *cmsg;
415 iov.iov_len = sizeof(data);
418 msg.msg_control = &ctl;
419 msg.msg_controllen = sizeof(ctl);
421 ret = recvmsg(fd, &msg, 0);
422 if (!ASSERT_EQ(ret, s, "recvmsg"))
424 ASSERT_STRNEQ(data, expected, s, "expected rcv data");
426 cmsg = CMSG_FIRSTHDR(&msg);
427 if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
428 cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
429 memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
431 pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
432 ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
434 ret = clock_gettime(CLOCK_REALTIME, &now_ts);
435 ASSERT_OK(ret, "clock_gettime");
436 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
438 if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
439 ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
443 static void snd_tstamp(int fd, char *b, size_t s)
445 struct sock_txtime opt = { .clockid = CLOCK_TAI };
446 char ctl[CMSG_SPACE(sizeof(__u64))];
447 struct timespec now_ts;
448 struct msghdr msg = {};
449 struct cmsghdr *cmsg;
454 ret = clock_gettime(CLOCK_TAI, &now_ts);
455 ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
456 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
462 msg.msg_control = &ctl;
463 msg.msg_controllen = sizeof(ctl);
465 cmsg = CMSG_FIRSTHDR(&msg);
466 cmsg->cmsg_level = SOL_SOCKET;
467 cmsg->cmsg_type = SCM_TXTIME;
468 cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
469 *(__u64 *)CMSG_DATA(cmsg) = now_ns;
471 ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
472 ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
474 ret = sendmsg(fd, &msg, 0);
475 ASSERT_EQ(ret, s, "sendmsg");
478 static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
480 int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
481 char buf[] = "testing testing";
482 struct nstoken *nstoken;
484 nstoken = open_netns(NS_DST);
485 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
487 listen_fd = start_server(family, type, addr, port, 0);
488 close_netns(nstoken);
490 if (!ASSERT_GE(listen_fd, 0, "listen"))
493 /* Ensure the kernel puts the (rcv) timestamp for all skb */
494 err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
496 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
499 if (type == SOCK_STREAM) {
500 /* Ensure the kernel set EDT when sending out rst/ack
501 * from the kernel's ctl_sk.
503 err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
505 if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
509 nstoken = open_netns(NS_SRC);
510 if (!ASSERT_OK_PTR(nstoken, "setns src"))
512 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
513 close_netns(nstoken);
515 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
518 if (type == SOCK_STREAM) {
521 accept_fd = accept(listen_fd, NULL, NULL);
522 if (!ASSERT_GE(accept_fd, 0, "accept"))
525 n = write(client_fd, buf, sizeof(buf));
526 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
528 rcv_tstamp(accept_fd, buf, sizeof(buf));
530 snd_tstamp(client_fd, buf, sizeof(buf));
531 rcv_tstamp(listen_fd, buf, sizeof(buf));
542 static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
543 const struct netns_setup_result *setup_result)
545 LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
546 LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
547 LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src);
548 LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst);
549 struct nstoken *nstoken;
552 /* setup ns_src tc progs */
553 nstoken = open_netns(NS_SRC);
554 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
556 /* tc qdisc add dev veth_src clsact */
557 QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src);
558 /* tc filter add dev veth_src ingress bpf da ingress_host */
559 XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
560 /* tc filter add dev veth_src egress bpf da egress_host */
561 XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
562 close_netns(nstoken);
564 /* setup ns_dst tc progs */
565 nstoken = open_netns(NS_DST);
566 if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
568 /* tc qdisc add dev veth_dst clsact */
569 QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst);
570 /* tc filter add dev veth_dst ingress bpf da ingress_host */
571 XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
572 /* tc filter add dev veth_dst egress bpf da egress_host */
573 XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
574 close_netns(nstoken);
576 /* setup ns_fwd tc progs */
577 nstoken = open_netns(NS_FWD);
578 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
580 /* tc qdisc add dev veth_dst_fwd clsact */
581 QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
582 /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
583 XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
584 skel->progs.ingress_fwdns_prio100, 100);
585 /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
586 XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
587 skel->progs.ingress_fwdns_prio101, 101);
588 /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
589 XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
590 skel->progs.egress_fwdns_prio100, 100);
591 /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
592 XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
593 skel->progs.egress_fwdns_prio101, 101);
595 /* tc qdisc add dev veth_src_fwd clsact */
596 QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
597 /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
598 XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
599 skel->progs.ingress_fwdns_prio100, 100);
600 /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
601 XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
602 skel->progs.ingress_fwdns_prio101, 101);
603 /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
604 XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
605 skel->progs.egress_fwdns_prio100, 100);
606 /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
607 XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
608 skel->progs.egress_fwdns_prio101, 101);
609 close_netns(nstoken);
613 close_netns(nstoken);
628 const char *cnt_names[] = {
629 "ingress_fwdns_p100",
630 "ingress_fwdns_p101",
652 const char *test_names[] = {
653 "tcp ip6 clear dtime",
664 static const char *dtime_cnt_str(int test, int cnt)
666 static char name[64];
668 snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
673 static const char *dtime_err_str(int test, int cnt)
675 static char name[64];
677 snprintf(name, sizeof(name), "%s %s errs", test_names[test],
683 static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
685 int i, t = TCP_IP6_CLEAR_DTIME;
686 __u32 *dtimes = skel->bss->dtimes[t];
687 __u32 *errs = skel->bss->errs[t];
690 test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
692 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
693 dtime_cnt_str(t, INGRESS_FWDNS_P100));
694 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
695 dtime_cnt_str(t, INGRESS_FWDNS_P101));
696 ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
697 dtime_cnt_str(t, EGRESS_FWDNS_P100));
698 ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
699 dtime_cnt_str(t, EGRESS_FWDNS_P101));
700 ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
701 dtime_cnt_str(t, EGRESS_ENDHOST));
702 ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
703 dtime_cnt_str(t, INGRESS_ENDHOST));
705 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
706 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
709 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
711 __u32 *dtimes, *errs;
715 if (family == AF_INET) {
716 t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
719 t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
723 dtimes = skel->bss->dtimes[t];
724 errs = skel->bss->errs[t];
727 test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
729 /* fwdns_prio100 prog does not read delivery_time_type, so
730 * kernel puts the (rcv) timetamp in __sk_buff->tstamp
732 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
733 dtime_cnt_str(t, INGRESS_FWDNS_P100));
734 for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
735 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
737 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
738 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
741 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
743 __u32 *dtimes, *errs;
747 if (family == AF_INET) {
748 t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
751 t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
755 dtimes = skel->bss->dtimes[t];
756 errs = skel->bss->errs[t];
759 test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
761 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
762 dtime_cnt_str(t, INGRESS_FWDNS_P100));
763 /* non mono delivery time is not forwarded */
764 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
765 dtime_cnt_str(t, INGRESS_FWDNS_P101));
766 for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
767 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
769 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
770 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
773 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
775 struct test_tc_dtime *skel;
776 struct nstoken *nstoken;
779 skel = test_tc_dtime__open();
780 if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
783 skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
784 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
786 err = test_tc_dtime__load(skel);
787 if (!ASSERT_OK(err, "test_tc_dtime__load"))
790 if (netns_load_dtime_bpf(skel, setup_result))
793 nstoken = open_netns(NS_FWD);
794 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
796 err = set_forwarding(false);
797 close_netns(nstoken);
798 if (!ASSERT_OK(err, "disable forwarding"))
801 test_tcp_clear_dtime(skel);
803 test_tcp_dtime(skel, AF_INET, true);
804 test_tcp_dtime(skel, AF_INET6, true);
805 test_udp_dtime(skel, AF_INET, true);
806 test_udp_dtime(skel, AF_INET6, true);
808 /* Test the kernel ip[6]_forward path instead
809 * of bpf_redirect_neigh().
811 nstoken = open_netns(NS_FWD);
812 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
814 err = set_forwarding(true);
815 close_netns(nstoken);
816 if (!ASSERT_OK(err, "enable forwarding"))
819 test_tcp_dtime(skel, AF_INET, false);
820 test_tcp_dtime(skel, AF_INET6, false);
821 test_udp_dtime(skel, AF_INET, false);
822 test_udp_dtime(skel, AF_INET6, false);
825 test_tc_dtime__destroy(skel);
828 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
830 struct nstoken *nstoken = NULL;
831 struct test_tc_neigh_fib *skel = NULL;
834 nstoken = open_netns(NS_FWD);
835 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
838 skel = test_tc_neigh_fib__open();
839 if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
842 if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
845 err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
846 if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
849 err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
850 if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
853 err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
854 if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
857 if (netns_load_bpf())
860 /* bpf_fib_lookup() checks if forwarding is enabled */
861 if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
868 test_tc_neigh_fib__destroy(skel);
869 close_netns(nstoken);
872 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
874 struct nstoken *nstoken = NULL;
875 struct test_tc_neigh *skel = NULL;
878 nstoken = open_netns(NS_FWD);
879 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
882 skel = test_tc_neigh__open();
883 if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
886 skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
887 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
889 err = test_tc_neigh__load(skel);
890 if (!ASSERT_OK(err, "test_tc_neigh__load"))
893 err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
894 if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
897 err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
898 if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
901 err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
902 if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
905 if (netns_load_bpf())
908 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
915 test_tc_neigh__destroy(skel);
916 close_netns(nstoken);
919 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
921 struct nstoken *nstoken;
922 struct test_tc_peer *skel;
925 nstoken = open_netns(NS_FWD);
926 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
929 skel = test_tc_peer__open();
930 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
933 skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
934 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
936 err = test_tc_peer__load(skel);
937 if (!ASSERT_OK(err, "test_tc_peer__load"))
940 err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
941 if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
944 err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
945 if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
948 err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
949 if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
952 if (netns_load_bpf())
955 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
962 test_tc_peer__destroy(skel);
963 close_netns(nstoken);
966 static int tun_open(char *name)
971 fd = open("/dev/net/tun", O_RDWR);
972 if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
975 memset(&ifr, 0, sizeof(ifr));
977 ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
979 strncpy(ifr.ifr_name, name, IFNAMSIZ);
981 err = ioctl(fd, TUNSETIFF, &ifr);
982 if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
985 SYS("ip link set dev %s up", name);
998 static int tun_relay_loop(int src_fd, int target_fd)
1007 int direction, nread, nwrite;
1009 FD_SET(src_fd, &rfds);
1010 FD_SET(target_fd, &rfds);
1012 if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
1013 log_err("select failed");
1017 direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
1019 nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
1021 log_err("read failed");
1025 nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
1026 if (nwrite != nread) {
1027 log_err("write failed");
1033 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
1035 struct test_tc_peer *skel = NULL;
1036 struct nstoken *nstoken = NULL;
1038 int tunnel_pid = -1;
1039 int src_fd, target_fd = -1;
1042 /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
1043 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
1044 * expose the L2 headers encapsulating the IP packet to BPF and hence
1045 * don't have skb in suitable state for this test. Alternative to TUN/TAP
1046 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
1047 * but that requires much more complicated setup.
1049 nstoken = open_netns(NS_SRC);
1050 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
1053 src_fd = tun_open("tun_src");
1054 if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
1057 close_netns(nstoken);
1059 nstoken = open_netns(NS_FWD);
1060 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
1063 target_fd = tun_open("tun_fwd");
1064 if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
1067 tunnel_pid = fork();
1068 if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
1071 if (tunnel_pid == 0)
1072 exit(tun_relay_loop(src_fd, target_fd));
1074 skel = test_tc_peer__open();
1075 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1078 ifindex = if_nametoindex("tun_fwd");
1079 if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
1082 skel->rodata->IFINDEX_SRC = ifindex;
1083 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
1085 err = test_tc_peer__load(skel);
1086 if (!ASSERT_OK(err, "test_tc_peer__load"))
1089 err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
1090 if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
1093 err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
1094 if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
1097 err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
1098 if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
1101 /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
1102 * towards dst, and "tc_dst" to redirect packets
1103 * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
1105 SYS("tc qdisc add dev tun_fwd clsact");
1106 SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
1109 SYS("tc qdisc add dev veth_dst_fwd clsact");
1110 SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
1112 SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
1115 /* Setup route and neigh tables */
1116 SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
1117 SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
1119 SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
1120 SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
1122 SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
1123 SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
1124 " dev tun_src scope global");
1125 SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
1126 SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
1127 SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
1128 " dev tun_src scope global");
1129 SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
1131 SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
1132 SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
1134 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1137 test_connectivity();
1140 if (tunnel_pid > 0) {
1141 kill(tunnel_pid, SIGTERM);
1142 waitpid(tunnel_pid, NULL, 0);
1149 test_tc_peer__destroy(skel);
1151 close_netns(nstoken);
1154 #define RUN_TEST(name) \
1156 struct netns_setup_result setup_result; \
1157 if (test__start_subtest(#name)) \
1158 if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
1159 if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
1160 "setup links and routes")) \
1161 test_ ## name(&setup_result); \
1162 netns_setup_namespaces("delete"); \
1166 static void *test_tc_redirect_run_tests(void *arg)
1168 netns_setup_namespaces_nofail("delete");
1170 RUN_TEST(tc_redirect_peer);
1171 RUN_TEST(tc_redirect_peer_l3);
1172 RUN_TEST(tc_redirect_neigh);
1173 RUN_TEST(tc_redirect_neigh_fib);
1174 RUN_TEST(tc_redirect_dtime);
1178 void serial_test_tc_redirect(void)
1180 pthread_t test_thread;
1183 /* Run the tests in their own thread to isolate the namespace changes
1184 * so they do not affect the environment of other tests.
1185 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
1187 err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
1188 if (ASSERT_OK(err, "pthread_create"))
1189 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");