1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/slab.h>
7 #include <linux/compat.h>
8 #include <net/compat.h>
9 #include <linux/io_uring.h>
11 #include <uapi/linux/io_uring.h>
15 #include "alloc_cache.h"
20 #if defined(CONFIG_NET)
28 struct sockaddr __user *addr;
47 struct sockaddr __user *addr;
54 struct compat_msghdr __user *umsg_compat;
55 struct user_msghdr __user *umsg;
76 #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
78 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
80 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
82 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
83 sqe->buf_index || sqe->splice_fd_in))
86 shutdown->how = READ_ONCE(sqe->len);
90 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
92 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
96 if (issue_flags & IO_URING_F_NONBLOCK)
99 sock = sock_from_file(req->file);
103 ret = __sys_shutdown_sock(sock, shutdown->how);
104 io_req_set_res(req, ret, 0);
108 static bool io_net_retry(struct socket *sock, int flags)
110 if (!(flags & MSG_WAITALL))
112 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
115 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
117 struct io_async_msghdr *hdr = req->async_data;
119 if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED)
122 /* Let normal cleanup path reap it if we fail adding to the cache */
123 if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) {
124 req->async_data = NULL;
125 req->flags &= ~REQ_F_ASYNC_DATA;
129 static struct io_async_msghdr *io_recvmsg_alloc_async(struct io_kiocb *req,
130 unsigned int issue_flags)
132 struct io_ring_ctx *ctx = req->ctx;
133 struct io_cache_entry *entry;
135 if (!(issue_flags & IO_URING_F_UNLOCKED) &&
136 (entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) {
137 struct io_async_msghdr *hdr;
139 hdr = container_of(entry, struct io_async_msghdr, cache);
140 req->flags |= REQ_F_ASYNC_DATA;
141 req->async_data = hdr;
145 if (!io_alloc_async_data(req))
146 return req->async_data;
151 static int io_setup_async_msg(struct io_kiocb *req,
152 struct io_async_msghdr *kmsg,
153 unsigned int issue_flags)
155 struct io_async_msghdr *async_msg;
157 if (req_has_async_data(req))
159 async_msg = io_recvmsg_alloc_async(req, issue_flags);
161 kfree(kmsg->free_iov);
164 req->flags |= REQ_F_NEED_CLEANUP;
165 memcpy(async_msg, kmsg, sizeof(*kmsg));
166 async_msg->msg.msg_name = &async_msg->addr;
167 /* if were using fast_iov, set it to the new one */
168 if (!async_msg->free_iov)
169 async_msg->msg.msg_iter.iov = async_msg->fast_iov;
174 static int io_sendmsg_copy_hdr(struct io_kiocb *req,
175 struct io_async_msghdr *iomsg)
177 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
179 iomsg->msg.msg_name = &iomsg->addr;
180 iomsg->free_iov = iomsg->fast_iov;
181 return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
185 int io_sendzc_prep_async(struct io_kiocb *req)
187 struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
188 struct io_async_msghdr *io;
191 if (!zc->addr || req_has_async_data(req))
193 if (io_alloc_async_data(req))
196 io = req->async_data;
197 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr);
201 static int io_setup_async_addr(struct io_kiocb *req,
202 struct sockaddr_storage *addr,
203 unsigned int issue_flags)
205 struct io_async_msghdr *io;
207 if (!addr || req_has_async_data(req))
209 if (io_alloc_async_data(req))
211 io = req->async_data;
212 memcpy(&io->addr, addr, sizeof(io->addr));
216 int io_sendmsg_prep_async(struct io_kiocb *req)
220 ret = io_sendmsg_copy_hdr(req, req->async_data);
222 req->flags |= REQ_F_NEED_CLEANUP;
226 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
228 struct io_async_msghdr *io = req->async_data;
233 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
235 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
237 if (unlikely(sqe->file_index || sqe->addr2))
240 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
241 sr->len = READ_ONCE(sqe->len);
242 sr->flags = READ_ONCE(sqe->ioprio);
243 if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
245 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
246 if (sr->msg_flags & MSG_DONTWAIT)
247 req->flags |= REQ_F_NOWAIT;
250 if (req->ctx->compat)
251 sr->msg_flags |= MSG_CMSG_COMPAT;
257 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
259 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
260 struct io_async_msghdr iomsg, *kmsg;
266 sock = sock_from_file(req->file);
270 if (req_has_async_data(req)) {
271 kmsg = req->async_data;
273 ret = io_sendmsg_copy_hdr(req, &iomsg);
279 if (!(req->flags & REQ_F_POLLED) &&
280 (sr->flags & IORING_RECVSEND_POLL_FIRST))
281 return io_setup_async_msg(req, kmsg, issue_flags);
283 flags = sr->msg_flags;
284 if (issue_flags & IO_URING_F_NONBLOCK)
285 flags |= MSG_DONTWAIT;
286 if (flags & MSG_WAITALL)
287 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
289 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
292 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
293 return io_setup_async_msg(req, kmsg, issue_flags);
294 if (ret == -ERESTARTSYS)
296 if (ret > 0 && io_net_retry(sock, flags)) {
298 req->flags |= REQ_F_PARTIAL_IO;
299 return io_setup_async_msg(req, kmsg, issue_flags);
303 /* fast path, check for non-NULL to avoid function call */
305 kfree(kmsg->free_iov);
306 req->flags &= ~REQ_F_NEED_CLEANUP;
307 io_netmsg_recycle(req, issue_flags);
310 else if (sr->done_io)
312 io_req_set_res(req, ret, 0);
316 int io_send(struct io_kiocb *req, unsigned int issue_flags)
318 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
326 if (!(req->flags & REQ_F_POLLED) &&
327 (sr->flags & IORING_RECVSEND_POLL_FIRST))
330 sock = sock_from_file(req->file);
334 ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
339 msg.msg_control = NULL;
340 msg.msg_controllen = 0;
344 flags = sr->msg_flags;
345 if (issue_flags & IO_URING_F_NONBLOCK)
346 flags |= MSG_DONTWAIT;
347 if (flags & MSG_WAITALL)
348 min_ret = iov_iter_count(&msg.msg_iter);
350 msg.msg_flags = flags;
351 ret = sock_sendmsg(sock, &msg);
353 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
355 if (ret == -ERESTARTSYS)
357 if (ret > 0 && io_net_retry(sock, flags)) {
361 req->flags |= REQ_F_PARTIAL_IO;
368 else if (sr->done_io)
370 io_req_set_res(req, ret, 0);
374 static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg)
378 if (iomsg->namelen < 0)
380 if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out),
381 iomsg->namelen, &hdr))
383 if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr))
389 static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
390 struct io_async_msghdr *iomsg)
392 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
393 struct user_msghdr msg;
396 if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg)))
399 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
403 if (req->flags & REQ_F_BUFFER_SELECT) {
404 if (msg.msg_iovlen == 0) {
405 sr->len = iomsg->fast_iov[0].iov_len = 0;
406 iomsg->fast_iov[0].iov_base = NULL;
407 iomsg->free_iov = NULL;
408 } else if (msg.msg_iovlen > 1) {
411 if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov)))
413 sr->len = iomsg->fast_iov[0].iov_len;
414 iomsg->free_iov = NULL;
417 if (req->flags & REQ_F_APOLL_MULTISHOT) {
418 iomsg->namelen = msg.msg_namelen;
419 iomsg->controllen = msg.msg_controllen;
420 if (io_recvmsg_multishot_overflow(iomsg))
424 iomsg->free_iov = iomsg->fast_iov;
425 ret = __import_iovec(READ, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV,
426 &iomsg->free_iov, &iomsg->msg.msg_iter,
436 static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
437 struct io_async_msghdr *iomsg)
439 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
440 struct compat_msghdr msg;
441 struct compat_iovec __user *uiov;
444 if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg)))
447 ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
451 uiov = compat_ptr(msg.msg_iov);
452 if (req->flags & REQ_F_BUFFER_SELECT) {
455 if (msg.msg_iovlen == 0) {
457 iomsg->free_iov = NULL;
458 } else if (msg.msg_iovlen > 1) {
461 if (!access_ok(uiov, sizeof(*uiov)))
463 if (__get_user(clen, &uiov->iov_len))
468 iomsg->free_iov = NULL;
471 if (req->flags & REQ_F_APOLL_MULTISHOT) {
472 iomsg->namelen = msg.msg_namelen;
473 iomsg->controllen = msg.msg_controllen;
474 if (io_recvmsg_multishot_overflow(iomsg))
478 iomsg->free_iov = iomsg->fast_iov;
479 ret = __import_iovec(READ, (struct iovec __user *)uiov, msg.msg_iovlen,
480 UIO_FASTIOV, &iomsg->free_iov,
481 &iomsg->msg.msg_iter, true);
490 static int io_recvmsg_copy_hdr(struct io_kiocb *req,
491 struct io_async_msghdr *iomsg)
493 iomsg->msg.msg_name = &iomsg->addr;
496 if (req->ctx->compat)
497 return __io_compat_recvmsg_copy_hdr(req, iomsg);
500 return __io_recvmsg_copy_hdr(req, iomsg);
503 int io_recvmsg_prep_async(struct io_kiocb *req)
507 ret = io_recvmsg_copy_hdr(req, req->async_data);
509 req->flags |= REQ_F_NEED_CLEANUP;
513 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)
515 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
517 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
519 if (unlikely(sqe->file_index || sqe->addr2))
522 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
523 sr->len = READ_ONCE(sqe->len);
524 sr->flags = READ_ONCE(sqe->ioprio);
525 if (sr->flags & ~(RECVMSG_FLAGS))
527 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
528 if (sr->msg_flags & MSG_DONTWAIT)
529 req->flags |= REQ_F_NOWAIT;
530 if (sr->msg_flags & MSG_ERRQUEUE)
531 req->flags |= REQ_F_CLEAR_POLLIN;
532 if (sr->flags & IORING_RECV_MULTISHOT) {
533 if (!(req->flags & REQ_F_BUFFER_SELECT))
535 if (sr->msg_flags & MSG_WAITALL)
537 if (req->opcode == IORING_OP_RECV && sr->len)
539 req->flags |= REQ_F_APOLL_MULTISHOT;
543 if (req->ctx->compat)
544 sr->msg_flags |= MSG_CMSG_COMPAT;
550 static inline void io_recv_prep_retry(struct io_kiocb *req)
552 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
555 sr->len = 0; /* get from the provided buffer */
559 * Finishes io_recv and io_recvmsg.
561 * Returns true if it is actually finished, or false if it should run
562 * again (for multishot).
564 static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
565 unsigned int cflags, bool mshot_finished)
567 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
568 io_req_set_res(req, *ret, cflags);
573 if (!mshot_finished) {
574 if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret,
575 cflags | IORING_CQE_F_MORE, false)) {
576 io_recv_prep_retry(req);
580 * Otherwise stop multishot but use the current result.
581 * Probably will end up going into overflow, but this means
582 * we cannot trust the ordering anymore
586 io_req_set_res(req, *ret, cflags);
588 if (req->flags & REQ_F_POLLED)
589 *ret = IOU_STOP_MULTISHOT;
595 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
596 struct io_sr_msg *sr, void __user **buf,
599 unsigned long ubuf = (unsigned long) *buf;
602 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
607 if (kmsg->controllen) {
608 unsigned long control = ubuf + hdr - kmsg->controllen;
610 kmsg->msg.msg_control_user = (void __user *) control;
611 kmsg->msg.msg_controllen = kmsg->controllen;
614 sr->buf = *buf; /* stash for later copy */
615 *buf = (void __user *) (ubuf + hdr);
616 kmsg->payloadlen = *len = *len - hdr;
620 struct io_recvmsg_multishot_hdr {
621 struct io_uring_recvmsg_out msg;
622 struct sockaddr_storage addr;
625 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
626 struct io_async_msghdr *kmsg,
627 unsigned int flags, bool *finished)
631 struct io_recvmsg_multishot_hdr hdr;
634 kmsg->msg.msg_name = &hdr.addr;
635 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
636 kmsg->msg.msg_namelen = 0;
638 if (sock->file->f_flags & O_NONBLOCK)
639 flags |= MSG_DONTWAIT;
641 err = sock_recvmsg(sock, &kmsg->msg, flags);
642 *finished = err <= 0;
646 hdr.msg = (struct io_uring_recvmsg_out) {
647 .controllen = kmsg->controllen - kmsg->msg.msg_controllen,
648 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
651 hdr.msg.payloadlen = err;
652 if (err > kmsg->payloadlen)
653 err = kmsg->payloadlen;
655 copy_len = sizeof(struct io_uring_recvmsg_out);
656 if (kmsg->msg.msg_namelen > kmsg->namelen)
657 copy_len += kmsg->namelen;
659 copy_len += kmsg->msg.msg_namelen;
662 * "fromlen shall refer to the value before truncation.."
665 hdr.msg.namelen = kmsg->msg.msg_namelen;
667 /* ensure that there is no gap between hdr and sockaddr_storage */
668 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
669 sizeof(struct io_uring_recvmsg_out));
670 if (copy_to_user(io->buf, &hdr, copy_len)) {
675 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
676 kmsg->controllen + err;
679 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
681 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
682 struct io_async_msghdr iomsg, *kmsg;
686 int ret, min_ret = 0;
687 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
688 bool mshot_finished = true;
690 sock = sock_from_file(req->file);
694 if (req_has_async_data(req)) {
695 kmsg = req->async_data;
697 ret = io_recvmsg_copy_hdr(req, &iomsg);
703 if (!(req->flags & REQ_F_POLLED) &&
704 (sr->flags & IORING_RECVSEND_POLL_FIRST))
705 return io_setup_async_msg(req, kmsg, issue_flags);
708 if (io_do_buffer_select(req)) {
710 size_t len = sr->len;
712 buf = io_buffer_select(req, &len, issue_flags);
716 if (req->flags & REQ_F_APOLL_MULTISHOT) {
717 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
719 io_kbuf_recycle(req, issue_flags);
724 kmsg->fast_iov[0].iov_base = buf;
725 kmsg->fast_iov[0].iov_len = len;
726 iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
730 flags = sr->msg_flags;
732 flags |= MSG_DONTWAIT;
733 if (flags & MSG_WAITALL)
734 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
736 kmsg->msg.msg_get_inq = 1;
737 if (req->flags & REQ_F_APOLL_MULTISHOT)
738 ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
741 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
745 if (ret == -EAGAIN && force_nonblock) {
746 ret = io_setup_async_msg(req, kmsg, issue_flags);
747 if (ret == -EAGAIN && (req->flags & IO_APOLL_MULTI_POLLED) ==
748 IO_APOLL_MULTI_POLLED) {
749 io_kbuf_recycle(req, issue_flags);
750 return IOU_ISSUE_SKIP_COMPLETE;
754 if (ret == -ERESTARTSYS)
756 if (ret > 0 && io_net_retry(sock, flags)) {
758 req->flags |= REQ_F_PARTIAL_IO;
759 return io_setup_async_msg(req, kmsg, issue_flags);
762 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
768 else if (sr->done_io)
771 io_kbuf_recycle(req, issue_flags);
773 cflags = io_put_kbuf(req, issue_flags);
774 if (kmsg->msg.msg_inq)
775 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
777 if (!io_recv_finish(req, &ret, cflags, mshot_finished))
778 goto retry_multishot;
780 if (mshot_finished) {
781 io_netmsg_recycle(req, issue_flags);
782 /* fast path, check for non-NULL to avoid function call */
784 kfree(kmsg->free_iov);
785 req->flags &= ~REQ_F_NEED_CLEANUP;
791 int io_recv(struct io_kiocb *req, unsigned int issue_flags)
793 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
799 int ret, min_ret = 0;
800 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
801 size_t len = sr->len;
803 if (!(req->flags & REQ_F_POLLED) &&
804 (sr->flags & IORING_RECVSEND_POLL_FIRST))
807 sock = sock_from_file(req->file);
812 if (io_do_buffer_select(req)) {
815 buf = io_buffer_select(req, &len, issue_flags);
821 ret = import_single_range(READ, sr->buf, len, &iov, &msg.msg_iter);
827 msg.msg_control = NULL;
830 msg.msg_controllen = 0;
834 flags = sr->msg_flags;
836 flags |= MSG_DONTWAIT;
837 if (flags & MSG_WAITALL)
838 min_ret = iov_iter_count(&msg.msg_iter);
840 ret = sock_recvmsg(sock, &msg, flags);
842 if (ret == -EAGAIN && force_nonblock) {
843 if ((req->flags & IO_APOLL_MULTI_POLLED) == IO_APOLL_MULTI_POLLED) {
844 io_kbuf_recycle(req, issue_flags);
845 return IOU_ISSUE_SKIP_COMPLETE;
850 if (ret == -ERESTARTSYS)
852 if (ret > 0 && io_net_retry(sock, flags)) {
856 req->flags |= REQ_F_PARTIAL_IO;
860 } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
867 else if (sr->done_io)
870 io_kbuf_recycle(req, issue_flags);
872 cflags = io_put_kbuf(req, issue_flags);
874 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
876 if (!io_recv_finish(req, &ret, cflags, ret <= 0))
877 goto retry_multishot;
882 int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
884 struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
885 struct io_ring_ctx *ctx = req->ctx;
887 if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))
890 zc->flags = READ_ONCE(sqe->ioprio);
891 if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
892 IORING_RECVSEND_FIXED_BUF | IORING_RECVSEND_NOTIF_FLUSH))
894 if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
895 unsigned idx = READ_ONCE(sqe->buf_index);
897 if (unlikely(idx >= ctx->nr_user_bufs))
899 idx = array_index_nospec(idx, ctx->nr_user_bufs);
900 req->imu = READ_ONCE(ctx->user_bufs[idx]);
901 io_req_set_rsrc_node(req, ctx, 0);
904 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
905 zc->len = READ_ONCE(sqe->len);
906 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
907 zc->slot_idx = READ_ONCE(sqe->notification_idx);
908 if (zc->msg_flags & MSG_DONTWAIT)
909 req->flags |= REQ_F_NOWAIT;
911 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
912 zc->addr_len = READ_ONCE(sqe->addr_len);
916 if (req->ctx->compat)
917 zc->msg_flags |= MSG_CMSG_COMPAT;
922 static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
923 struct iov_iter *from, size_t length)
925 struct skb_shared_info *shinfo = skb_shinfo(skb);
926 int frag = shinfo->nr_frags;
930 unsigned long truesize = 0;
932 if (!shinfo->nr_frags)
933 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
935 if (!skb_zcopy_managed(skb) || !iov_iter_is_bvec(from)) {
936 skb_zcopy_downgrade_managed(skb);
937 return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
940 bi.bi_size = min(from->count, length);
941 bi.bi_bvec_done = from->iov_offset;
944 while (bi.bi_size && frag < MAX_SKB_FRAGS) {
945 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
948 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
949 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
950 v.bv_offset, v.bv_len);
951 bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
956 shinfo->nr_frags = frag;
957 from->bvec += bi.bi_idx;
958 from->nr_segs -= bi.bi_idx;
959 from->count = bi.bi_size;
960 from->iov_offset = bi.bi_bvec_done;
962 skb->data_len += copied;
964 skb->truesize += truesize;
966 if (sk && sk->sk_type == SOCK_STREAM) {
967 sk_wmem_queued_add(sk, truesize);
968 if (!skb_zcopy_pure(skb))
969 sk_mem_charge(sk, truesize);
971 refcount_add(truesize, &skb->sk->sk_wmem_alloc);
976 int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
978 struct sockaddr_storage __address, *addr = NULL;
979 struct io_ring_ctx *ctx = req->ctx;
980 struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
981 struct io_notif_slot *notif_slot;
982 struct io_kiocb *notif;
987 int ret, min_ret = 0;
989 if (!(req->flags & REQ_F_POLLED) &&
990 (zc->flags & IORING_RECVSEND_POLL_FIRST))
993 if (issue_flags & IO_URING_F_UNLOCKED)
995 sock = sock_from_file(req->file);
999 notif_slot = io_get_notif_slot(ctx, zc->slot_idx);
1002 notif = io_get_notif(ctx, notif_slot);
1006 msg.msg_name = NULL;
1007 msg.msg_control = NULL;
1008 msg.msg_controllen = 0;
1009 msg.msg_namelen = 0;
1012 if (req_has_async_data(req)) {
1013 struct io_async_msghdr *io = req->async_data;
1015 msg.msg_name = addr = &io->addr;
1017 ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address);
1018 if (unlikely(ret < 0))
1020 msg.msg_name = (struct sockaddr *)&__address;
1023 msg.msg_namelen = zc->addr_len;
1026 if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
1027 ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu,
1028 (u64)(uintptr_t)zc->buf, zc->len);
1032 ret = import_single_range(WRITE, zc->buf, zc->len, &iov,
1036 ret = io_notif_account_mem(notif, zc->len);
1041 msg_flags = zc->msg_flags | MSG_ZEROCOPY;
1042 if (issue_flags & IO_URING_F_NONBLOCK)
1043 msg_flags |= MSG_DONTWAIT;
1044 if (msg_flags & MSG_WAITALL)
1045 min_ret = iov_iter_count(&msg.msg_iter);
1047 msg.msg_flags = msg_flags;
1048 msg.msg_ubuf = &io_notif_to_data(notif)->uarg;
1049 msg.sg_from_iter = io_sg_from_iter;
1050 ret = sock_sendmsg(sock, &msg);
1052 if (unlikely(ret < min_ret)) {
1053 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1054 return io_setup_async_addr(req, addr, issue_flags);
1056 if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
1060 req->flags |= REQ_F_PARTIAL_IO;
1061 return io_setup_async_addr(req, addr, issue_flags);
1063 if (ret == -ERESTARTSYS)
1066 } else if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) {
1067 io_notif_slot_flush_submit(notif_slot, 0);
1072 else if (zc->done_io)
1074 io_req_set_res(req, ret, 0);
1078 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1080 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1083 if (sqe->len || sqe->buf_index)
1086 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1087 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1088 accept->flags = READ_ONCE(sqe->accept_flags);
1089 accept->nofile = rlimit(RLIMIT_NOFILE);
1090 flags = READ_ONCE(sqe->ioprio);
1091 if (flags & ~IORING_ACCEPT_MULTISHOT)
1094 accept->file_slot = READ_ONCE(sqe->file_index);
1095 if (accept->file_slot) {
1096 if (accept->flags & SOCK_CLOEXEC)
1098 if (flags & IORING_ACCEPT_MULTISHOT &&
1099 accept->file_slot != IORING_FILE_INDEX_ALLOC)
1102 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1104 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1105 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1106 if (flags & IORING_ACCEPT_MULTISHOT)
1107 req->flags |= REQ_F_APOLL_MULTISHOT;
1111 int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1113 struct io_ring_ctx *ctx = req->ctx;
1114 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1115 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1116 unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
1117 bool fixed = !!accept->file_slot;
1123 fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1124 if (unlikely(fd < 0))
1127 file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
1132 ret = PTR_ERR(file);
1133 if (ret == -EAGAIN && force_nonblock) {
1135 * if it's multishot and polled, we don't need to
1136 * return EAGAIN to arm the poll infra since it
1137 * has already been done
1139 if ((req->flags & IO_APOLL_MULTI_POLLED) ==
1140 IO_APOLL_MULTI_POLLED)
1141 ret = IOU_ISSUE_SKIP_COMPLETE;
1144 if (ret == -ERESTARTSYS)
1147 } else if (!fixed) {
1148 fd_install(fd, file);
1151 ret = io_fixed_fd_install(req, issue_flags, file,
1155 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1156 io_req_set_res(req, ret, 0);
1161 io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, false))
1164 io_req_set_res(req, ret, 0);
1165 if (req->flags & REQ_F_POLLED)
1166 return IOU_STOP_MULTISHOT;
1170 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1172 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1174 if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1177 sock->domain = READ_ONCE(sqe->fd);
1178 sock->type = READ_ONCE(sqe->off);
1179 sock->protocol = READ_ONCE(sqe->len);
1180 sock->file_slot = READ_ONCE(sqe->file_index);
1181 sock->nofile = rlimit(RLIMIT_NOFILE);
1183 sock->flags = sock->type & ~SOCK_TYPE_MASK;
1184 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1186 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1191 int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1193 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1194 bool fixed = !!sock->file_slot;
1199 fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1200 if (unlikely(fd < 0))
1203 file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1207 ret = PTR_ERR(file);
1208 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1210 if (ret == -ERESTARTSYS)
1213 } else if (!fixed) {
1214 fd_install(fd, file);
1217 ret = io_fixed_fd_install(req, issue_flags, file,
1220 io_req_set_res(req, ret, 0);
1224 int io_connect_prep_async(struct io_kiocb *req)
1226 struct io_async_connect *io = req->async_data;
1227 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1229 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
1232 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1234 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1236 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1239 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1240 conn->addr_len = READ_ONCE(sqe->addr2);
1244 int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1246 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1247 struct io_async_connect __io, *io;
1248 unsigned file_flags;
1250 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1252 if (req_has_async_data(req)) {
1253 io = req->async_data;
1255 ret = move_addr_to_kernel(connect->addr,
1263 file_flags = force_nonblock ? O_NONBLOCK : 0;
1265 ret = __sys_connect_file(req->file, &io->address,
1266 connect->addr_len, file_flags);
1267 if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
1268 if (req_has_async_data(req))
1270 if (io_alloc_async_data(req)) {
1274 memcpy(req->async_data, &__io, sizeof(__io));
1277 if (ret == -ERESTARTSYS)
1282 io_req_set_res(req, ret, 0);
1286 void io_netmsg_cache_free(struct io_cache_entry *entry)
1288 kfree(container_of(entry, struct io_async_msghdr, cache));