1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
4 * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5 * between src and dst. The netns fwd has veth links to each src and dst. The
6 * client is in src and server in dst. The test installs a TC BPF program to each
7 * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8 * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9 * switch from ingress side; it also installs a checker prog on the egress side
10 * to drop unexpected traffic.
13 #include <arpa/inet.h>
14 #include <linux/if_tun.h>
15 #include <linux/limits.h>
16 #include <linux/sysctl.h>
17 #include <linux/time_types.h>
18 #include <linux/net_tstamp.h>
25 #include "test_progs.h"
26 #include "network_helpers.h"
27 #include "test_tc_neigh_fib.skel.h"
28 #include "test_tc_neigh.skel.h"
29 #include "test_tc_peer.skel.h"
30 #include "test_tc_dtime.skel.h"
33 #define TCP_TX_DELAY 37
36 #define NS_SRC "ns_src"
37 #define NS_FWD "ns_fwd"
38 #define NS_DST "ns_dst"
40 #define IP4_SRC "172.16.1.100"
41 #define IP4_DST "172.16.2.100"
42 #define IP4_TUN_SRC "172.17.1.100"
43 #define IP4_TUN_FWD "172.17.1.200"
46 #define IP6_SRC "0::1:dead:beef:cafe"
47 #define IP6_DST "0::2:dead:beef:cafe"
48 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
49 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
52 #define IP4_SLL "169.254.0.1"
53 #define IP4_DLL "169.254.0.2"
54 #define IP4_NET "169.254.0.0"
56 #define MAC_DST_FWD "00:11:22:33:44:55"
57 #define MAC_DST "00:22:33:44:55:66"
59 #define IFADDR_STR_LEN 18
60 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
62 #define TIMEOUT_MILLIS 10000
63 #define NSEC_PER_SEC 1000000000ULL
65 #define log_err(MSG, ...) \
66 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
67 __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
69 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
71 static int write_file(const char *path, const char *newval)
75 f = fopen(path, "r+");
78 if (fwrite(newval, strlen(newval), 1, f) != 1) {
79 log_err("writing to %s failed", path);
87 static int netns_setup_namespaces(const char *verb)
89 const char * const *ns = namespaces;
93 snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
94 if (!ASSERT_OK(system(cmd), cmd))
101 static void netns_setup_namespaces_nofail(const char *verb)
103 const char * const *ns = namespaces;
107 snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
113 struct netns_setup_result {
114 int ifindex_veth_src;
115 int ifindex_veth_src_fwd;
116 int ifindex_veth_dst;
117 int ifindex_veth_dst_fwd;
120 static int get_ifaddr(const char *name, char *ifaddr)
126 snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
127 f = fopen(path, "r");
128 if (!ASSERT_OK_PTR(f, path))
131 ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
132 if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
140 static int netns_setup_links_and_routes(struct netns_setup_result *result)
142 struct nstoken *nstoken = NULL;
143 char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
145 SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd");
146 SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd");
148 SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD);
149 SYS(fail, "ip link set veth_dst address " MAC_DST);
151 if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
154 result->ifindex_veth_src = if_nametoindex("veth_src");
155 if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src"))
158 result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd");
159 if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd"))
162 result->ifindex_veth_dst = if_nametoindex("veth_dst");
163 if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst"))
166 result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd");
167 if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd"))
170 SYS(fail, "ip link set veth_src netns " NS_SRC);
171 SYS(fail, "ip link set veth_src_fwd netns " NS_FWD);
172 SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD);
173 SYS(fail, "ip link set veth_dst netns " NS_DST);
175 /** setup in 'src' namespace */
176 nstoken = open_netns(NS_SRC);
177 if (!ASSERT_OK_PTR(nstoken, "setns src"))
180 SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src");
181 SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad");
182 SYS(fail, "ip link set dev veth_src up");
184 SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global");
185 SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global");
186 SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global");
188 SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s",
190 SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s",
193 close_netns(nstoken);
195 /** setup in 'fwd' namespace */
196 nstoken = open_netns(NS_FWD);
197 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
200 /* The fwd netns automatically gets a v6 LL address / routes, but also
201 * needs v4 one in order to start ARP probing. IP4_NET route is added
202 * to the endpoints so that the ARP processing will reply.
204 SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd");
205 SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
206 SYS(fail, "ip link set dev veth_src_fwd up");
207 SYS(fail, "ip link set dev veth_dst_fwd up");
209 SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
210 SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
211 SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
212 SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
214 close_netns(nstoken);
216 /** setup in 'dst' namespace */
217 nstoken = open_netns(NS_DST);
218 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
221 SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst");
222 SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad");
223 SYS(fail, "ip link set dev veth_dst up");
225 SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global");
226 SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global");
227 SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global");
229 SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
230 SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
232 close_netns(nstoken);
237 close_netns(nstoken);
241 static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
243 char err_str[128], ifname[16];
246 qdisc_hook->ifindex = ifindex;
247 qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
248 err = bpf_tc_hook_create(qdisc_hook);
249 snprintf(err_str, sizeof(err_str),
250 "qdisc add dev %s clsact",
251 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
252 err_str[sizeof(err_str) - 1] = 0;
253 ASSERT_OK(err, err_str);
258 static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
259 enum bpf_tc_attach_point xgress,
260 const struct bpf_program *prog, int priority)
262 LIBBPF_OPTS(bpf_tc_opts, tc_attach);
263 char err_str[128], ifname[16];
266 qdisc_hook->attach_point = xgress;
267 tc_attach.prog_fd = bpf_program__fd(prog);
268 tc_attach.priority = priority;
269 err = bpf_tc_attach(qdisc_hook, &tc_attach);
270 snprintf(err_str, sizeof(err_str),
271 "filter add dev %s %s prio %d bpf da %s",
272 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
273 xgress == BPF_TC_INGRESS ? "ingress" : "egress",
274 priority, bpf_program__name(prog));
275 err_str[sizeof(err_str) - 1] = 0;
276 ASSERT_OK(err, err_str);
281 #define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \
282 if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \
286 #define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \
287 if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \
291 static int netns_load_bpf(const struct bpf_program *src_prog,
292 const struct bpf_program *dst_prog,
293 const struct bpf_program *chk_prog,
294 const struct netns_setup_result *setup_result)
296 LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
297 LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
300 /* tc qdisc add dev veth_src_fwd clsact */
301 QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
302 /* tc filter add dev veth_src_fwd ingress bpf da src_prog */
303 XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0);
304 /* tc filter add dev veth_src_fwd egress bpf da chk_prog */
305 XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
307 /* tc qdisc add dev veth_dst_fwd clsact */
308 QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
309 /* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */
310 XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
311 /* tc filter add dev veth_dst_fwd egress bpf da chk_prog */
312 XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
319 static void test_tcp(int family, const char *addr, __u16 port)
321 int listen_fd = -1, accept_fd = -1, client_fd = -1;
322 char buf[] = "testing testing";
324 struct nstoken *nstoken;
326 nstoken = open_netns(NS_DST);
327 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
330 listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
331 if (!ASSERT_GE(listen_fd, 0, "listen"))
334 close_netns(nstoken);
335 nstoken = open_netns(NS_SRC);
336 if (!ASSERT_OK_PTR(nstoken, "setns src"))
339 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
340 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
343 accept_fd = accept(listen_fd, NULL, NULL);
344 if (!ASSERT_GE(accept_fd, 0, "accept"))
347 if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
350 n = write(client_fd, buf, sizeof(buf));
351 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
354 n = read(accept_fd, buf, sizeof(buf));
355 ASSERT_EQ(n, sizeof(buf), "recv from server");
359 close_netns(nstoken);
368 static int test_ping(int family, const char *addr)
370 SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
376 static void test_connectivity(void)
378 test_tcp(AF_INET, IP4_DST, IP4_PORT);
379 test_ping(AF_INET, IP4_DST);
380 test_tcp(AF_INET6, IP6_DST, IP6_PORT);
381 test_ping(AF_INET6, IP6_DST);
384 static int set_forwarding(bool enable)
388 err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
389 if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
392 err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
393 if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
399 static void rcv_tstamp(int fd, const char *expected, size_t s)
401 struct __kernel_timespec pkt_ts = {};
402 char ctl[CMSG_SPACE(sizeof(pkt_ts))];
403 struct timespec now_ts;
404 struct msghdr msg = {};
405 __u64 now_ns, pkt_ns;
406 struct cmsghdr *cmsg;
412 iov.iov_len = sizeof(data);
415 msg.msg_control = &ctl;
416 msg.msg_controllen = sizeof(ctl);
418 ret = recvmsg(fd, &msg, 0);
419 if (!ASSERT_EQ(ret, s, "recvmsg"))
421 ASSERT_STRNEQ(data, expected, s, "expected rcv data");
423 cmsg = CMSG_FIRSTHDR(&msg);
424 if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
425 cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
426 memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
428 pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
429 ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
431 ret = clock_gettime(CLOCK_REALTIME, &now_ts);
432 ASSERT_OK(ret, "clock_gettime");
433 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
435 if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
436 ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
440 static void snd_tstamp(int fd, char *b, size_t s)
442 struct sock_txtime opt = { .clockid = CLOCK_TAI };
443 char ctl[CMSG_SPACE(sizeof(__u64))];
444 struct timespec now_ts;
445 struct msghdr msg = {};
446 struct cmsghdr *cmsg;
451 ret = clock_gettime(CLOCK_TAI, &now_ts);
452 ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
453 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
459 msg.msg_control = &ctl;
460 msg.msg_controllen = sizeof(ctl);
462 cmsg = CMSG_FIRSTHDR(&msg);
463 cmsg->cmsg_level = SOL_SOCKET;
464 cmsg->cmsg_type = SCM_TXTIME;
465 cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
466 *(__u64 *)CMSG_DATA(cmsg) = now_ns;
468 ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
469 ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
471 ret = sendmsg(fd, &msg, 0);
472 ASSERT_EQ(ret, s, "sendmsg");
475 static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
477 int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
478 char buf[] = "testing testing";
479 struct nstoken *nstoken;
481 nstoken = open_netns(NS_DST);
482 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
484 listen_fd = start_server(family, type, addr, port, 0);
485 close_netns(nstoken);
487 if (!ASSERT_GE(listen_fd, 0, "listen"))
490 /* Ensure the kernel puts the (rcv) timestamp for all skb */
491 err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
493 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
496 if (type == SOCK_STREAM) {
497 /* Ensure the kernel set EDT when sending out rst/ack
498 * from the kernel's ctl_sk.
500 err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
502 if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
506 nstoken = open_netns(NS_SRC);
507 if (!ASSERT_OK_PTR(nstoken, "setns src"))
509 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
510 close_netns(nstoken);
512 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
515 if (type == SOCK_STREAM) {
518 accept_fd = accept(listen_fd, NULL, NULL);
519 if (!ASSERT_GE(accept_fd, 0, "accept"))
522 n = write(client_fd, buf, sizeof(buf));
523 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
525 rcv_tstamp(accept_fd, buf, sizeof(buf));
527 snd_tstamp(client_fd, buf, sizeof(buf));
528 rcv_tstamp(listen_fd, buf, sizeof(buf));
539 static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
540 const struct netns_setup_result *setup_result)
542 LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
543 LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
544 LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src);
545 LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst);
546 struct nstoken *nstoken;
549 /* setup ns_src tc progs */
550 nstoken = open_netns(NS_SRC);
551 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
553 /* tc qdisc add dev veth_src clsact */
554 QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src);
555 /* tc filter add dev veth_src ingress bpf da ingress_host */
556 XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
557 /* tc filter add dev veth_src egress bpf da egress_host */
558 XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
559 close_netns(nstoken);
561 /* setup ns_dst tc progs */
562 nstoken = open_netns(NS_DST);
563 if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
565 /* tc qdisc add dev veth_dst clsact */
566 QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst);
567 /* tc filter add dev veth_dst ingress bpf da ingress_host */
568 XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
569 /* tc filter add dev veth_dst egress bpf da egress_host */
570 XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
571 close_netns(nstoken);
573 /* setup ns_fwd tc progs */
574 nstoken = open_netns(NS_FWD);
575 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
577 /* tc qdisc add dev veth_dst_fwd clsact */
578 QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
579 /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
580 XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
581 skel->progs.ingress_fwdns_prio100, 100);
582 /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
583 XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
584 skel->progs.ingress_fwdns_prio101, 101);
585 /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
586 XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
587 skel->progs.egress_fwdns_prio100, 100);
588 /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
589 XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
590 skel->progs.egress_fwdns_prio101, 101);
592 /* tc qdisc add dev veth_src_fwd clsact */
593 QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
594 /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
595 XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
596 skel->progs.ingress_fwdns_prio100, 100);
597 /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
598 XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
599 skel->progs.ingress_fwdns_prio101, 101);
600 /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
601 XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
602 skel->progs.egress_fwdns_prio100, 100);
603 /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
604 XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
605 skel->progs.egress_fwdns_prio101, 101);
606 close_netns(nstoken);
610 close_netns(nstoken);
625 const char *cnt_names[] = {
626 "ingress_fwdns_p100",
627 "ingress_fwdns_p101",
649 const char *test_names[] = {
650 "tcp ip6 clear dtime",
661 static const char *dtime_cnt_str(int test, int cnt)
663 static char name[64];
665 snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
670 static const char *dtime_err_str(int test, int cnt)
672 static char name[64];
674 snprintf(name, sizeof(name), "%s %s errs", test_names[test],
680 static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
682 int i, t = TCP_IP6_CLEAR_DTIME;
683 __u32 *dtimes = skel->bss->dtimes[t];
684 __u32 *errs = skel->bss->errs[t];
687 test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
689 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
690 dtime_cnt_str(t, INGRESS_FWDNS_P100));
691 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
692 dtime_cnt_str(t, INGRESS_FWDNS_P101));
693 ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
694 dtime_cnt_str(t, EGRESS_FWDNS_P100));
695 ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
696 dtime_cnt_str(t, EGRESS_FWDNS_P101));
697 ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
698 dtime_cnt_str(t, EGRESS_ENDHOST));
699 ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
700 dtime_cnt_str(t, INGRESS_ENDHOST));
702 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
703 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
706 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
708 __u32 *dtimes, *errs;
712 if (family == AF_INET) {
713 t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
716 t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
720 dtimes = skel->bss->dtimes[t];
721 errs = skel->bss->errs[t];
724 test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
726 /* fwdns_prio100 prog does not read delivery_time_type, so
727 * kernel puts the (rcv) timetamp in __sk_buff->tstamp
729 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
730 dtime_cnt_str(t, INGRESS_FWDNS_P100));
731 for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
732 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
734 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
735 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
738 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
740 __u32 *dtimes, *errs;
744 if (family == AF_INET) {
745 t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
748 t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
752 dtimes = skel->bss->dtimes[t];
753 errs = skel->bss->errs[t];
756 test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
758 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
759 dtime_cnt_str(t, INGRESS_FWDNS_P100));
760 /* non mono delivery time is not forwarded */
761 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
762 dtime_cnt_str(t, INGRESS_FWDNS_P101));
763 for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
764 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
766 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
767 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
770 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
772 struct test_tc_dtime *skel;
773 struct nstoken *nstoken;
776 skel = test_tc_dtime__open();
777 if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
780 skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
781 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
783 err = test_tc_dtime__load(skel);
784 if (!ASSERT_OK(err, "test_tc_dtime__load"))
787 if (netns_load_dtime_bpf(skel, setup_result))
790 nstoken = open_netns(NS_FWD);
791 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
793 err = set_forwarding(false);
794 close_netns(nstoken);
795 if (!ASSERT_OK(err, "disable forwarding"))
798 test_tcp_clear_dtime(skel);
800 test_tcp_dtime(skel, AF_INET, true);
801 test_tcp_dtime(skel, AF_INET6, true);
802 test_udp_dtime(skel, AF_INET, true);
803 test_udp_dtime(skel, AF_INET6, true);
805 /* Test the kernel ip[6]_forward path instead
806 * of bpf_redirect_neigh().
808 nstoken = open_netns(NS_FWD);
809 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
811 err = set_forwarding(true);
812 close_netns(nstoken);
813 if (!ASSERT_OK(err, "enable forwarding"))
816 test_tcp_dtime(skel, AF_INET, false);
817 test_tcp_dtime(skel, AF_INET6, false);
818 test_udp_dtime(skel, AF_INET, false);
819 test_udp_dtime(skel, AF_INET6, false);
822 test_tc_dtime__destroy(skel);
825 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
827 struct nstoken *nstoken = NULL;
828 struct test_tc_neigh_fib *skel = NULL;
830 nstoken = open_netns(NS_FWD);
831 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
834 skel = test_tc_neigh_fib__open();
835 if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
838 if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
841 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
842 skel->progs.tc_chk, setup_result))
845 /* bpf_fib_lookup() checks if forwarding is enabled */
846 if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
853 test_tc_neigh_fib__destroy(skel);
854 close_netns(nstoken);
857 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
859 struct nstoken *nstoken = NULL;
860 struct test_tc_neigh *skel = NULL;
863 nstoken = open_netns(NS_FWD);
864 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
867 skel = test_tc_neigh__open();
868 if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
871 skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
872 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
874 err = test_tc_neigh__load(skel);
875 if (!ASSERT_OK(err, "test_tc_neigh__load"))
878 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
879 skel->progs.tc_chk, setup_result))
882 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
889 test_tc_neigh__destroy(skel);
890 close_netns(nstoken);
893 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
895 struct nstoken *nstoken;
896 struct test_tc_peer *skel;
899 nstoken = open_netns(NS_FWD);
900 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
903 skel = test_tc_peer__open();
904 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
907 skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
908 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
910 err = test_tc_peer__load(skel);
911 if (!ASSERT_OK(err, "test_tc_peer__load"))
914 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
915 skel->progs.tc_chk, setup_result))
918 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
925 test_tc_peer__destroy(skel);
926 close_netns(nstoken);
929 static int tun_open(char *name)
934 fd = open("/dev/net/tun", O_RDWR);
935 if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
938 memset(&ifr, 0, sizeof(ifr));
940 ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
942 strncpy(ifr.ifr_name, name, IFNAMSIZ);
944 err = ioctl(fd, TUNSETIFF, &ifr);
945 if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
948 SYS(fail, "ip link set dev %s up", name);
961 static int tun_relay_loop(int src_fd, int target_fd)
970 int direction, nread, nwrite;
972 FD_SET(src_fd, &rfds);
973 FD_SET(target_fd, &rfds);
975 if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
976 log_err("select failed");
980 direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
982 nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
984 log_err("read failed");
988 nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
989 if (nwrite != nread) {
990 log_err("write failed");
996 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
998 LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
999 LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
1000 struct test_tc_peer *skel = NULL;
1001 struct nstoken *nstoken = NULL;
1003 int tunnel_pid = -1;
1004 int src_fd, target_fd = -1;
1007 /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
1008 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
1009 * expose the L2 headers encapsulating the IP packet to BPF and hence
1010 * don't have skb in suitable state for this test. Alternative to TUN/TAP
1011 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
1012 * but that requires much more complicated setup.
1014 nstoken = open_netns(NS_SRC);
1015 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
1018 src_fd = tun_open("tun_src");
1019 if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
1022 close_netns(nstoken);
1024 nstoken = open_netns(NS_FWD);
1025 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
1028 target_fd = tun_open("tun_fwd");
1029 if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
1032 tunnel_pid = fork();
1033 if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
1036 if (tunnel_pid == 0)
1037 exit(tun_relay_loop(src_fd, target_fd));
1039 skel = test_tc_peer__open();
1040 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1043 ifindex = if_nametoindex("tun_fwd");
1044 if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
1047 skel->rodata->IFINDEX_SRC = ifindex;
1048 skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
1050 err = test_tc_peer__load(skel);
1051 if (!ASSERT_OK(err, "test_tc_peer__load"))
1054 /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
1055 * towards dst, and "tc_dst" to redirect packets
1056 * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
1058 /* tc qdisc add dev tun_fwd clsact */
1059 QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
1060 /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
1061 XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
1063 /* tc qdisc add dev veth_dst_fwd clsact */
1064 QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
1065 /* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */
1066 XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
1067 /* tc filter add dev veth_dst_fwd egress bpf da tc_chk */
1068 XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
1070 /* Setup route and neigh tables */
1071 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
1072 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
1074 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
1075 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
1077 SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
1078 SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
1079 " dev tun_src scope global");
1080 SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
1081 SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
1082 SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
1083 " dev tun_src scope global");
1084 SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
1086 SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
1087 SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
1089 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1092 test_connectivity();
1095 if (tunnel_pid > 0) {
1096 kill(tunnel_pid, SIGTERM);
1097 waitpid(tunnel_pid, NULL, 0);
1104 test_tc_peer__destroy(skel);
1106 close_netns(nstoken);
1109 #define RUN_TEST(name) \
1111 struct netns_setup_result setup_result; \
1112 if (test__start_subtest(#name)) \
1113 if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
1114 if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
1115 "setup links and routes")) \
1116 test_ ## name(&setup_result); \
1117 netns_setup_namespaces("delete"); \
1121 static void *test_tc_redirect_run_tests(void *arg)
1123 netns_setup_namespaces_nofail("delete");
1125 RUN_TEST(tc_redirect_peer);
1126 RUN_TEST(tc_redirect_peer_l3);
1127 RUN_TEST(tc_redirect_neigh);
1128 RUN_TEST(tc_redirect_neigh_fib);
1129 RUN_TEST(tc_redirect_dtime);
1133 void test_tc_redirect(void)
1135 pthread_t test_thread;
1138 /* Run the tests in their own thread to isolate the namespace changes
1139 * so they do not affect the environment of other tests.
1140 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
1142 err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
1143 if (ASSERT_OK(err, "pthread_create"))
1144 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");