1 /* SPDX-License-Identifier: MIT */
2 /* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */
15 #include <arpa/inet.h>
16 #include <linux/errqueue.h>
17 #include <linux/if_packet.h>
18 #include <linux/io_uring.h>
19 #include <linux/ipv6.h>
20 #include <linux/socket.h>
21 #include <linux/sockios.h>
22 #include <net/ethernet.h>
24 #include <netinet/in.h>
25 #include <netinet/ip.h>
26 #include <netinet/ip6.h>
27 #include <netinet/tcp.h>
28 #include <netinet/udp.h>
29 #include <sys/ioctl.h>
31 #include <sys/resource.h>
32 #include <sys/socket.h>
35 #include <sys/types.h>
39 #define NOTIF_TAG 0xfffffffULL
50 static bool cfg_cork = false;
51 static int cfg_mode = MODE_ZC_FIXED;
52 static int cfg_nr_reqs = 8;
53 static int cfg_family = PF_UNSPEC;
54 static int cfg_payload_len;
55 static int cfg_port = 8000;
56 static int cfg_runtime_ms = 4200;
58 static socklen_t cfg_alen;
59 static struct sockaddr_storage cfg_dst_addr;
61 static char payload[IP_MAXPACKET] __attribute__((aligned(4096)));
67 unsigned *ring_entries;
76 unsigned *ring_entries;
77 struct io_uring_cqe *cqes;
84 unsigned *kring_entries;
88 struct io_uring_sqe *sqes;
100 unsigned *kring_entries;
102 struct io_uring_cqe *cqes;
108 struct io_uring_sq sq;
109 struct io_uring_cq cq;
114 # ifndef __NR_io_uring_setup
115 # define __NR_io_uring_setup 535
117 # ifndef __NR_io_uring_enter
118 # define __NR_io_uring_enter 536
120 # ifndef __NR_io_uring_register
121 # define __NR_io_uring_register 537
123 #else /* !__alpha__ */
124 # ifndef __NR_io_uring_setup
125 # define __NR_io_uring_setup 425
127 # ifndef __NR_io_uring_enter
128 # define __NR_io_uring_enter 426
130 # ifndef __NR_io_uring_register
131 # define __NR_io_uring_register 427
135 #if defined(__x86_64) || defined(__i386__)
136 #define read_barrier() __asm__ __volatile__("":::"memory")
137 #define write_barrier() __asm__ __volatile__("":::"memory")
140 #define read_barrier() __sync_synchronize()
141 #define write_barrier() __sync_synchronize()
144 static int io_uring_setup(unsigned int entries, struct io_uring_params *p)
146 return syscall(__NR_io_uring_setup, entries, p);
149 static int io_uring_enter(int fd, unsigned int to_submit,
150 unsigned int min_complete,
151 unsigned int flags, sigset_t *sig)
153 return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
154 flags, sig, _NSIG / 8);
157 static int io_uring_register_buffers(struct io_uring *ring,
158 const struct iovec *iovecs,
163 ret = syscall(__NR_io_uring_register, ring->ring_fd,
164 IORING_REGISTER_BUFFERS, iovecs, nr_iovecs);
165 return (ret < 0) ? -errno : ret;
168 static int io_uring_mmap(int fd, struct io_uring_params *p,
169 struct io_uring_sq *sq, struct io_uring_cq *cq)
175 sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
176 ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
177 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
178 if (ptr == MAP_FAILED)
180 sq->khead = ptr + p->sq_off.head;
181 sq->ktail = ptr + p->sq_off.tail;
182 sq->kring_mask = ptr + p->sq_off.ring_mask;
183 sq->kring_entries = ptr + p->sq_off.ring_entries;
184 sq->kflags = ptr + p->sq_off.flags;
185 sq->kdropped = ptr + p->sq_off.dropped;
186 sq->array = ptr + p->sq_off.array;
188 size = p->sq_entries * sizeof(struct io_uring_sqe);
189 sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
190 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
191 if (sq->sqes == MAP_FAILED) {
194 munmap(sq->khead, sq->ring_sz);
198 cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
199 ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
200 MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
201 if (ptr == MAP_FAILED) {
203 munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe));
206 cq->khead = ptr + p->cq_off.head;
207 cq->ktail = ptr + p->cq_off.tail;
208 cq->kring_mask = ptr + p->cq_off.ring_mask;
209 cq->kring_entries = ptr + p->cq_off.ring_entries;
210 cq->koverflow = ptr + p->cq_off.overflow;
211 cq->cqes = ptr + p->cq_off.cqes;
215 static int io_uring_queue_init(unsigned entries, struct io_uring *ring,
218 struct io_uring_params p;
221 memset(ring, 0, sizeof(*ring));
222 memset(&p, 0, sizeof(p));
225 fd = io_uring_setup(entries, &p);
228 ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq);
236 static int io_uring_submit(struct io_uring *ring)
238 struct io_uring_sq *sq = &ring->sq;
239 const unsigned mask = *sq->kring_mask;
240 unsigned ktail, submitted, to_submit;
244 if (*sq->khead != *sq->ktail) {
245 submitted = *sq->kring_entries;
248 if (sq->sqe_head == sq->sqe_tail)
252 to_submit = sq->sqe_tail - sq->sqe_head;
253 for (submitted = 0; submitted < to_submit; submitted++) {
255 sq->array[ktail++ & mask] = sq->sqe_head++ & mask;
260 if (*sq->ktail != ktail) {
266 ret = io_uring_enter(ring->ring_fd, submitted, 0,
267 IORING_ENTER_GETEVENTS, NULL);
268 return ret < 0 ? -errno : ret;
271 static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
272 const void *buf, size_t len, int flags)
274 memset(sqe, 0, sizeof(*sqe));
275 sqe->opcode = (__u8) IORING_OP_SEND;
277 sqe->addr = (unsigned long) buf;
279 sqe->msg_flags = (__u32) flags;
282 static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd,
283 const void *buf, size_t len, int flags,
286 io_uring_prep_send(sqe, sockfd, buf, len, flags);
287 sqe->opcode = (__u8) IORING_OP_SEND_ZC;
288 sqe->ioprio = zc_flags;
291 static struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
293 struct io_uring_sq *sq = &ring->sq;
295 if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries)
297 return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask];
300 static int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
302 struct io_uring_cq *cq = &ring->cq;
303 const unsigned mask = *cq->kring_mask;
304 unsigned head = *cq->khead;
310 if (head != *cq->ktail) {
311 *cqe_ptr = &cq->cqes[head & mask];
314 ret = io_uring_enter(ring->ring_fd, 0, 1,
315 IORING_ENTER_GETEVENTS, NULL);
323 static inline void io_uring_cqe_seen(struct io_uring *ring)
325 *(&ring->cq)->khead += 1;
329 static unsigned long gettimeofday_ms(void)
333 gettimeofday(&tv, NULL);
334 return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
337 static void do_setsockopt(int fd, int level, int optname, int val)
339 if (setsockopt(fd, level, optname, &val, sizeof(val)))
340 error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
343 static int do_setup_tx(int domain, int type, int protocol)
347 fd = socket(domain, type, protocol);
349 error(1, errno, "socket t");
351 do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
353 if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
354 error(1, errno, "connect");
358 static void do_tx(int domain, int type, int protocol)
360 struct io_uring_sqe *sqe;
361 struct io_uring_cqe *cqe;
362 unsigned long packets = 0, bytes = 0;
363 struct io_uring ring;
369 fd = do_setup_tx(domain, type, protocol);
371 ret = io_uring_queue_init(512, &ring, 0);
373 error(1, ret, "io_uring: queue init");
375 iov.iov_base = payload;
376 iov.iov_len = cfg_payload_len;
378 ret = io_uring_register_buffers(&ring, &iov, 1);
380 error(1, ret, "io_uring: buffer registration");
382 tstop = gettimeofday_ms() + cfg_runtime_ms;
385 do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
387 for (i = 0; i < cfg_nr_reqs; i++) {
388 unsigned zc_flags = 0;
389 unsigned buf_idx = 0;
390 unsigned mode = cfg_mode;
391 unsigned msg_flags = MSG_WAITALL;
393 if (cfg_mode == MODE_MIXED)
396 sqe = io_uring_get_sqe(&ring);
398 if (mode == MODE_NONZC) {
399 io_uring_prep_send(sqe, fd, payload,
400 cfg_payload_len, msg_flags);
401 sqe->user_data = NONZC_TAG;
404 io_uring_prep_sendzc(sqe, fd, payload,
406 msg_flags, zc_flags);
407 if (mode == MODE_ZC_FIXED) {
408 sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
409 sqe->buf_index = buf_idx;
411 sqe->user_data = ZC_TAG;
415 ret = io_uring_submit(&ring);
416 if (ret != cfg_nr_reqs)
417 error(1, ret, "submit");
420 do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
421 for (i = 0; i < cfg_nr_reqs; i++) {
422 ret = io_uring_wait_cqe(&ring, &cqe);
424 error(1, ret, "wait cqe");
426 if (cqe->user_data != NONZC_TAG &&
427 cqe->user_data != ZC_TAG)
428 error(1, -EINVAL, "invalid cqe->user_data");
430 if (cqe->flags & IORING_CQE_F_NOTIF) {
431 if (cqe->flags & IORING_CQE_F_MORE)
432 error(1, -EINVAL, "invalid notif flags");
435 } else if (cqe->res <= 0) {
436 if (cqe->flags & IORING_CQE_F_MORE)
437 error(1, cqe->res, "more with a failed send");
438 error(1, cqe->res, "send failed");
440 if (cqe->user_data == ZC_TAG &&
441 !(cqe->flags & IORING_CQE_F_MORE))
442 error(1, cqe->res, "missing more flag");
446 io_uring_cqe_seen(&ring);
448 } while (gettimeofday_ms() < tstop);
451 ret = io_uring_wait_cqe(&ring, &cqe);
453 error(1, ret, "wait cqe");
454 if (cqe->flags & IORING_CQE_F_MORE)
455 error(1, -EINVAL, "invalid notif flags");
456 if (!(cqe->flags & IORING_CQE_F_NOTIF))
457 error(1, -EINVAL, "missing notif flag");
459 io_uring_cqe_seen(&ring);
463 fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n",
464 packets, bytes >> 20,
465 packets / (cfg_runtime_ms / 1000),
466 (bytes >> 20) / (cfg_runtime_ms / 1000));
469 error(1, errno, "close");
472 static void do_test(int domain, int type, int protocol)
476 for (i = 0; i < IP_MAXPACKET; i++)
477 payload[i] = 'a' + (i % 26);
478 do_tx(domain, type, protocol);
481 static void usage(const char *filepath)
483 error(1, 0, "Usage: %s (-4|-6) (udp|tcp) -D<dst_ip> [-s<payload size>] "
484 "[-t<time s>] [-n<batch>] [-p<port>] [-m<mode>]", filepath);
487 static void parse_opts(int argc, char **argv)
489 const int max_payload_len = sizeof(payload) -
490 sizeof(struct ipv6hdr) -
491 sizeof(struct tcphdr) -
492 40 /* max tcp options */;
493 struct sockaddr_in6 *addr6 = (void *) &cfg_dst_addr;
494 struct sockaddr_in *addr4 = (void *) &cfg_dst_addr;
500 cfg_payload_len = max_payload_len;
502 while ((c = getopt(argc, argv, "46D:p:s:t:n:c:m:")) != -1) {
505 if (cfg_family != PF_UNSPEC)
506 error(1, 0, "Pass one of -4 or -6");
507 cfg_family = PF_INET;
508 cfg_alen = sizeof(struct sockaddr_in);
511 if (cfg_family != PF_UNSPEC)
512 error(1, 0, "Pass one of -4 or -6");
513 cfg_family = PF_INET6;
514 cfg_alen = sizeof(struct sockaddr_in6);
520 cfg_port = strtoul(optarg, NULL, 0);
523 cfg_payload_len = strtoul(optarg, NULL, 0);
526 cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
529 cfg_nr_reqs = strtoul(optarg, NULL, 0);
532 cfg_cork = strtol(optarg, NULL, 0);
535 cfg_mode = strtol(optarg, NULL, 0);
540 switch (cfg_family) {
542 memset(addr4, 0, sizeof(*addr4));
543 addr4->sin_family = AF_INET;
544 addr4->sin_port = htons(cfg_port);
546 inet_pton(AF_INET, daddr, &(addr4->sin_addr)) != 1)
547 error(1, 0, "ipv4 parse error: %s", daddr);
550 memset(addr6, 0, sizeof(*addr6));
551 addr6->sin6_family = AF_INET6;
552 addr6->sin6_port = htons(cfg_port);
554 inet_pton(AF_INET6, daddr, &(addr6->sin6_addr)) != 1)
555 error(1, 0, "ipv6 parse error: %s", daddr);
558 error(1, 0, "illegal domain");
561 if (cfg_payload_len > max_payload_len)
562 error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
563 if (optind != argc - 1)
567 int main(int argc, char **argv)
569 const char *cfg_test = argv[argc - 1];
571 parse_opts(argc, argv);
573 if (!strcmp(cfg_test, "tcp"))
574 do_test(cfg_family, SOCK_STREAM, 0);
575 else if (!strcmp(cfg_test, "udp"))
576 do_test(cfg_family, SOCK_DGRAM, 0);
578 error(1, 0, "unknown cfg_test %s", cfg_test);