1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2017 Cavium, Inc.
5 #include <linux/netlink.h>
6 #include <linux/rtnetlink.h>
13 #include <sys/socket.h>
16 #include <arpa/inet.h>
21 #include <sys/ioctl.h>
22 #include <sys/syscall.h>
24 #include <bpf/libbpf.h>
25 #include <sys/resource.h>
28 int sock, sock_arp, flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
29 static int total_ifindex;
30 static int *ifindex_list;
31 static __u32 *prog_id_list;
33 static int lpm_map_fd;
34 static int rxcnt_map_fd;
35 static int arp_table_map_fd;
36 static int exact_match_map_fd;
37 static int tx_port_map_fd;
39 static int get_route_table(int rtm_family);
40 static void int_exit(int sig)
45 for (i = 0; i < total_ifindex; i++) {
46 if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) {
47 printf("bpf_get_link_xdp_id on iface %d failed\n",
51 if (prog_id_list[i] == prog_id)
52 bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
54 printf("couldn't find a prog id on iface %d\n",
57 printf("program on iface %d changed, not removing\n",
64 static void close_and_exit(int sig)
72 /* Get the mac address of the interface given interface name */
73 static __be64 getmac(char *iface)
79 fd = socket(AF_INET, SOCK_DGRAM, 0);
80 ifr.ifr_addr.sa_family = AF_INET;
81 strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1);
82 if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) {
83 printf("ioctl failed leaving....\n");
86 for (i = 0; i < 6 ; i++)
87 *((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i];
92 static int recv_msg(struct sockaddr_nl sock_addr, int sock)
100 len = recv(sock, buf_ptr, sizeof(buf) - nll, 0);
104 nh = (struct nlmsghdr *)buf_ptr;
106 if (nh->nlmsg_type == NLMSG_DONE)
110 if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH)
113 if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE)
119 /* Function to parse the route entry returned by netlink
120 * Updates the route entry related map entries
122 static void read_route(struct nlmsghdr *nh, int nll)
124 char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
125 struct bpf_lpm_trie_key *prefix_key;
126 struct rtattr *rt_attr;
127 struct rtmsg *rt_msg;
132 int dst_len, iface, metric;
143 struct arp_table arp;
148 if (nh->nlmsg_type == RTM_DELROUTE)
149 printf("DELETING Route entry\n");
150 else if (nh->nlmsg_type == RTM_GETROUTE)
151 printf("READING Route entry\n");
152 else if (nh->nlmsg_type == RTM_NEWROUTE)
153 printf("NEW Route entry\n");
155 printf("%d\n", nh->nlmsg_type);
157 memset(&route, 0, sizeof(route));
158 printf("Destination Gateway Genmask Metric Iface\n");
159 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
160 rt_msg = (struct rtmsg *)NLMSG_DATA(nh);
161 rtm_family = rt_msg->rtm_family;
162 if (rtm_family == AF_INET)
163 if (rt_msg->rtm_table != RT_TABLE_MAIN)
165 rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
166 rtl = RTM_PAYLOAD(nh);
168 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
169 switch (rt_attr->rta_type) {
172 (*((__be32 *)RTA_DATA(rt_attr))));
176 *((__be32 *)RTA_DATA(rt_attr)));
180 *((int *)RTA_DATA(rt_attr)));
183 sprintf(metrics, "%u",
184 *((int *)RTA_DATA(rt_attr)));
189 sprintf(dsts_len, "%d", rt_msg->rtm_dst_len);
190 route.dst = atoi(dsts);
191 route.dst_len = atoi(dsts_len);
192 route.gw = atoi(gws);
193 route.iface = atoi(ifs);
194 route.metric = atoi(metrics);
195 route.iface_name = alloca(sizeof(char *) * IFNAMSIZ);
196 route.iface_name = if_indextoname(route.iface, route.iface_name);
197 route.mac = getmac(route.iface_name);
200 assert(bpf_map_update_elem(tx_port_map_fd,
201 &route.iface, &route.iface, 0) == 0);
202 if (rtm_family == AF_INET) {
210 struct in_addr dst_addr, gw_addr, mask_addr;
212 prefix_key = alloca(sizeof(*prefix_key) + 3);
213 prefix_value = alloca(sizeof(*prefix_value));
215 prefix_key->prefixlen = 32;
216 prefix_key->prefixlen = route.dst_len;
217 direct_entry.mac = route.mac & 0xffffffffffff;
218 direct_entry.ifindex = route.iface;
219 direct_entry.arp.mac = 0;
220 direct_entry.arp.dst = 0;
221 if (route.dst_len == 32) {
222 if (nh->nlmsg_type == RTM_DELROUTE) {
223 assert(bpf_map_delete_elem(exact_match_map_fd,
226 if (bpf_map_lookup_elem(arp_table_map_fd,
228 &direct_entry.arp.mac) == 0)
229 direct_entry.arp.dst = route.dst;
230 assert(bpf_map_update_elem(exact_match_map_fd,
232 &direct_entry, 0) == 0);
235 for (i = 0; i < 4; i++)
236 prefix_key->data[i] = (route.dst >> i * 8) & 0xff;
238 dst_addr.s_addr = route.dst;
239 printf("%-16s", inet_ntoa(dst_addr));
241 gw_addr.s_addr = route.gw;
242 printf("%-16s", inet_ntoa(gw_addr));
244 mask_addr.s_addr = htonl(~(0xffffffffU >> route.dst_len));
245 printf("%-16s%-7d%s\n", inet_ntoa(mask_addr),
249 if (bpf_map_lookup_elem(lpm_map_fd, prefix_key,
251 for (i = 0; i < 4; i++)
252 prefix_value->prefix[i] = prefix_key->data[i];
253 prefix_value->value = route.mac & 0xffffffffffff;
254 prefix_value->ifindex = route.iface;
255 prefix_value->gw = route.gw;
256 prefix_value->metric = route.metric;
258 assert(bpf_map_update_elem(lpm_map_fd,
263 if (nh->nlmsg_type == RTM_DELROUTE) {
264 printf("deleting entry\n");
265 printf("prefix key=%d.%d.%d.%d/%d",
270 prefix_key->prefixlen);
271 assert(bpf_map_delete_elem(lpm_map_fd,
274 /* Rereading the route table to check if
275 * there is an entry with the same
276 * prefix but a different metric as the
279 get_route_table(AF_INET);
280 } else if (prefix_key->data[0] ==
281 prefix_value->prefix[0] &&
282 prefix_key->data[1] ==
283 prefix_value->prefix[1] &&
284 prefix_key->data[2] ==
285 prefix_value->prefix[2] &&
286 prefix_key->data[3] ==
287 prefix_value->prefix[3] &&
288 route.metric >= prefix_value->metric) {
291 for (i = 0; i < 4; i++)
292 prefix_value->prefix[i] =
294 prefix_value->value =
295 route.mac & 0xffffffffffff;
296 prefix_value->ifindex = route.iface;
297 prefix_value->gw = route.gw;
298 prefix_value->metric = route.metric;
299 assert(bpf_map_update_elem(lpm_map_fd,
306 memset(&route, 0, sizeof(route));
307 memset(dsts, 0, sizeof(dsts));
308 memset(dsts_len, 0, sizeof(dsts_len));
309 memset(gws, 0, sizeof(gws));
310 memset(ifs, 0, sizeof(ifs));
311 memset(&route, 0, sizeof(route));
315 /* Function to read the existing route table when the process is launched*/
316 static int get_route_table(int rtm_family)
318 struct sockaddr_nl sa;
332 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
334 printf("open netlink socket: %s\n", strerror(errno));
337 memset(&sa, 0, sizeof(sa));
338 sa.nl_family = AF_NETLINK;
339 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
340 printf("bind to netlink: %s\n", strerror(errno));
344 memset(&req, 0, sizeof(req));
345 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
346 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
347 req.nl.nlmsg_type = RTM_GETROUTE;
349 req.rt.rtm_family = rtm_family;
350 req.rt.rtm_table = RT_TABLE_MAIN;
351 req.nl.nlmsg_pid = 0;
352 req.nl.nlmsg_seq = ++seq;
353 memset(&msg, 0, sizeof(msg));
354 iov.iov_base = (void *)&req.nl;
355 iov.iov_len = req.nl.nlmsg_len;
358 ret = sendmsg(sock, &msg, 0);
360 printf("send to netlink: %s\n", strerror(errno));
364 memset(buf, 0, sizeof(buf));
365 nll = recv_msg(sa, sock);
367 printf("recv from netlink: %s\n", strerror(nll));
371 nh = (struct nlmsghdr *)buf;
378 /* Function to parse the arp entry returned by netlink
379 * Updates the arp entry related map entries
381 static void read_arp(struct nlmsghdr *nh, int nll)
383 struct rtattr *rt_attr;
384 char dsts[24], mac[24];
385 struct ndmsg *rt_msg;
393 struct arp_table arp;
398 if (nh->nlmsg_type == RTM_GETNEIGH)
399 printf("READING arp entry\n");
400 printf("Address HwAddress\n");
401 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
402 struct in_addr dst_addr;
406 rt_msg = (struct ndmsg *)NLMSG_DATA(nh);
407 rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
408 ndm_family = rt_msg->ndm_family;
409 rtl = RTM_PAYLOAD(nh);
410 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
411 switch (rt_attr->rta_type) {
414 *((__be32 *)RTA_DATA(rt_attr)));
418 *((__be64 *)RTA_DATA(rt_attr)));
424 arp_entry.dst = atoi(dsts);
425 arp_entry.mac = atol(mac);
427 dst_addr.s_addr = arp_entry.dst;
428 for (i = 0; i < 6; i++)
429 len += snprintf(mac_str + len, 18 - len, "%02llx%s",
430 ((arp_entry.mac >> i * 8) & 0xff),
432 printf("%-16s%s\n", inet_ntoa(dst_addr), mac_str);
434 if (ndm_family == AF_INET) {
435 if (bpf_map_lookup_elem(exact_match_map_fd,
437 &direct_entry) == 0) {
438 if (nh->nlmsg_type == RTM_DELNEIGH) {
439 direct_entry.arp.dst = 0;
440 direct_entry.arp.mac = 0;
441 } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
442 direct_entry.arp.dst = arp_entry.dst;
443 direct_entry.arp.mac = arp_entry.mac;
445 assert(bpf_map_update_elem(exact_match_map_fd,
449 memset(&direct_entry, 0, sizeof(direct_entry));
451 if (nh->nlmsg_type == RTM_DELNEIGH) {
452 assert(bpf_map_delete_elem(arp_table_map_fd,
453 &arp_entry.dst) == 0);
454 } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
455 assert(bpf_map_update_elem(arp_table_map_fd,
461 memset(&arp_entry, 0, sizeof(arp_entry));
462 memset(dsts, 0, sizeof(dsts));
466 /* Function to read the existing arp table when the process is launched*/
467 static int get_arp_table(int rtm_family)
469 struct sockaddr_nl sa;
482 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
484 printf("open netlink socket: %s\n", strerror(errno));
487 memset(&sa, 0, sizeof(sa));
488 sa.nl_family = AF_NETLINK;
489 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
490 printf("bind to netlink: %s\n", strerror(errno));
494 memset(&req, 0, sizeof(req));
495 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
496 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
497 req.nl.nlmsg_type = RTM_GETNEIGH;
498 req.rt.ndm_state = NUD_REACHABLE;
499 req.rt.ndm_family = rtm_family;
500 req.nl.nlmsg_pid = 0;
501 req.nl.nlmsg_seq = ++seq;
502 memset(&msg, 0, sizeof(msg));
503 iov.iov_base = (void *)&req.nl;
504 iov.iov_len = req.nl.nlmsg_len;
507 ret = sendmsg(sock, &msg, 0);
509 printf("send to netlink: %s\n", strerror(errno));
513 memset(buf, 0, sizeof(buf));
514 nll = recv_msg(sa, sock);
516 printf("recv from netlink: %s\n", strerror(nll));
520 nh = (struct nlmsghdr *)buf;
527 /* Function to keep track and update changes in route and arp table
528 * Give regular statistics of packets forwarded
530 static int monitor_route(void)
532 unsigned int nr_cpus = bpf_num_possible_cpus();
533 const unsigned int nr_keys = 256;
534 struct pollfd fds_route, fds_arp;
535 __u64 prev[nr_keys][nr_cpus];
536 struct sockaddr_nl la, lr;
537 __u64 values[nr_cpus];
544 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
546 printf("open netlink socket: %s\n", strerror(errno));
550 fcntl(sock, F_SETFL, O_NONBLOCK);
551 memset(&lr, 0, sizeof(lr));
552 lr.nl_family = AF_NETLINK;
553 lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
554 if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) {
555 printf("bind to netlink: %s\n", strerror(errno));
560 fds_route.events = POLL_IN;
562 sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
564 printf("open netlink socket: %s\n", strerror(errno));
568 fcntl(sock_arp, F_SETFL, O_NONBLOCK);
569 memset(&la, 0, sizeof(la));
570 la.nl_family = AF_NETLINK;
571 la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
572 if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) {
573 printf("bind to netlink: %s\n", strerror(errno));
577 fds_arp.fd = sock_arp;
578 fds_arp.events = POLL_IN;
580 memset(prev, 0, sizeof(prev));
582 signal(SIGINT, close_and_exit);
583 signal(SIGTERM, close_and_exit);
586 for (key = 0; key < nr_keys; key++) {
589 assert(bpf_map_lookup_elem(rxcnt_map_fd,
591 for (i = 0; i < nr_cpus; i++)
592 sum += (values[i] - prev[key][i]);
594 printf("proto %u: %10llu pkt/s\n",
595 key, sum / interval);
596 memcpy(prev[key], values, sizeof(values));
599 memset(buf, 0, sizeof(buf));
600 if (poll(&fds_route, 1, 3) == POLL_IN) {
601 nll = recv_msg(lr, sock);
603 printf("recv from netlink: %s\n", strerror(nll));
608 nh = (struct nlmsghdr *)buf;
609 printf("Routing table updated.\n");
612 memset(buf, 0, sizeof(buf));
613 if (poll(&fds_arp, 1, 3) == POLL_IN) {
614 nll = recv_msg(la, sock_arp);
616 printf("recv from netlink: %s\n", strerror(nll));
621 nh = (struct nlmsghdr *)buf;
631 static void usage(const char *prog)
634 "%s: %s [OPTS] interface name list\n\n"
637 " -F force loading prog\n",
641 int main(int ac, char **argv)
643 struct bpf_prog_load_attr prog_load_attr = {
644 .prog_type = BPF_PROG_TYPE_XDP,
646 struct bpf_prog_info info = {};
647 __u32 info_len = sizeof(info);
648 const char *optstr = "SF";
649 struct bpf_object *obj;
655 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
656 prog_load_attr.file = filename;
658 total_ifindex = ac - 1;
659 ifname_list = (argv + 1);
661 while ((opt = getopt(ac, argv, optstr)) != -1) {
664 flags |= XDP_FLAGS_SKB_MODE;
669 flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
674 usage(basename(argv[0]));
679 if (!(flags & XDP_FLAGS_SKB_MODE))
680 flags |= XDP_FLAGS_DRV_MODE;
683 usage(basename(argv[0]));
687 if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
690 printf("\n******************loading bpf file*********************\n");
692 printf("bpf_prog_load_xattr: %s\n", strerror(errno));
696 lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map");
697 rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
698 arp_table_map_fd = bpf_object__find_map_fd_by_name(obj, "arp_table");
699 exact_match_map_fd = bpf_object__find_map_fd_by_name(obj,
701 tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
702 if (lpm_map_fd < 0 || rxcnt_map_fd < 0 || arp_table_map_fd < 0 ||
703 exact_match_map_fd < 0 || tx_port_map_fd < 0) {
704 printf("bpf_object__find_map_fd_by_name failed\n");
708 ifindex_list = (int *)calloc(total_ifindex, sizeof(int *));
709 for (i = 0; i < total_ifindex; i++) {
710 ifindex_list[i] = if_nametoindex(ifname_list[i]);
711 if (!ifindex_list[i]) {
712 printf("Couldn't translate interface name: %s",
717 prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *));
718 for (i = 0; i < total_ifindex; i++) {
719 if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) {
720 printf("link set xdp fd failed\n");
721 int recovery_index = i;
723 for (i = 0; i < recovery_index; i++)
724 bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
728 err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
730 printf("can't get prog info - %s\n", strerror(errno));
733 prog_id_list[i] = info.id;
734 memset(&info, 0, sizeof(info));
735 printf("Attached to %d\n", ifindex_list[i]);
737 signal(SIGINT, int_exit);
738 signal(SIGTERM, int_exit);
740 printf("\n*******************ROUTE TABLE*************************\n");
741 get_route_table(AF_INET);
742 printf("\n*******************ARP TABLE***************************\n");
743 get_arp_table(AF_INET);
744 if (monitor_route() < 0) {
745 printf("Error in receiving route update");