io_uring: split network related opcodes into its own file
authorJens Axboe <axboe@kernel.dk>
Wed, 25 May 2022 12:25:13 +0000 (06:25 -0600)
committerJens Axboe <axboe@kernel.dk>
Mon, 25 Jul 2022 00:39:11 +0000 (18:39 -0600)
While at it, convert the handlers to just use io_eopnotsupp_prep()
if CONFIG_NET isn't set.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
io_uring/Makefile
io_uring/io_uring.c
io_uring/io_uring.h
io_uring/net.c [new file with mode: 0644]
io_uring/net.h [new file with mode: 0644]

index de953c0..c9ec1bb 100644 (file)
@@ -5,5 +5,5 @@
 obj-$(CONFIG_IO_URING)         += io_uring.o xattr.o nop.o fs.o splice.o \
                                        sync.o advise.o filetable.o \
                                        openclose.o uring_cmd.o epoll.o \
-                                       statx.o
+                                       statx.o net.o
 obj-$(CONFIG_IO_WQ)            += io-wq.o
index eb01d1a..cbc2098 100644 (file)
 #include "uring_cmd.h"
 #include "epoll.h"
 #include "statx.h"
+#include "net.h"
 
 #define IORING_MAX_ENTRIES     32768
 #define IORING_MAX_CQ_ENTRIES  (2 * IORING_MAX_ENTRIES)
 #define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
                                 IO_REQ_CLEAN_FLAGS)
 
-#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
-
 #define IO_TCTX_REFS_CACHE_NR  (1U << 10)
 
 struct io_mapped_ubuf {
@@ -295,25 +294,6 @@ struct io_timeout_data {
        u32                             flags;
 };
 
-struct io_accept {
-       struct file                     *file;
-       struct sockaddr __user          *addr;
-       int __user                      *addr_len;
-       int                             flags;
-       u32                             file_slot;
-       unsigned long                   nofile;
-};
-
-struct io_socket {
-       struct file                     *file;
-       int                             domain;
-       int                             type;
-       int                             protocol;
-       int                             flags;
-       u32                             file_slot;
-       unsigned long                   nofile;
-};
-
 struct io_cancel {
        struct file                     *file;
        u64                             addr;
@@ -350,25 +330,6 @@ struct io_rw {
        rwf_t                           flags;
 };
 
-struct io_connect {
-       struct file                     *file;
-       struct sockaddr __user          *addr;
-       int                             addr_len;
-};
-
-struct io_sr_msg {
-       struct file                     *file;
-       union {
-               struct compat_msghdr __user     *umsg_compat;
-               struct user_msghdr __user       *umsg;
-               void __user                     *buf;
-       };
-       int                             msg_flags;
-       size_t                          len;
-       size_t                          done_io;
-       unsigned int                    flags;
-};
-
 struct io_rsrc_update {
        struct file                     *file;
        u64                             arg;
@@ -385,30 +346,12 @@ struct io_provide_buf {
        __u16                           bid;
 };
 
-struct io_shutdown {
-       struct file                     *file;
-       int                             how;
-};
-
 struct io_msg {
        struct file                     *file;
        u64 user_data;
        u32 len;
 };
 
-struct io_async_connect {
-       struct sockaddr_storage         address;
-};
-
-struct io_async_msghdr {
-       struct iovec                    fast_iov[UIO_FASTIOV];
-       /* points to an allocated iov, if NULL we use fast_iov instead */
-       struct iovec                    *free_iov;
-       struct sockaddr __user          *uaddr;
-       struct msghdr                   msg;
-       struct sockaddr_storage         addr;
-};
-
 struct io_rw_state {
        struct iov_iter                 iter;
        struct iov_iter_state           iter_state;
@@ -517,9 +460,6 @@ static void io_req_task_queue(struct io_kiocb *req);
 static void __io_submit_flush_completions(struct io_ring_ctx *ctx);
 static int io_req_prep_async(struct io_kiocb *req);
 
-static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
-                                unsigned int issue_flags, u32 slot_index);
-
 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
 static void io_eventfd_signal(struct io_ring_ctx *ctx);
 static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags);
@@ -808,8 +748,7 @@ static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req)
        return __io_put_kbuf(req, &req->ctx->io_buffers_comp);
 }
 
-static inline unsigned int io_put_kbuf(struct io_kiocb *req,
-                                      unsigned issue_flags)
+inline unsigned int io_put_kbuf(struct io_kiocb *req, unsigned issue_flags)
 {
        unsigned int cflags;
 
@@ -1291,12 +1230,6 @@ static __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
        spin_unlock_irq(&ctx->timeout_lock);
 }
 
-static inline void io_commit_cqring(struct io_ring_ctx *ctx)
-{
-       /* order cqe stores with ring update */
-       smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
-}
-
 static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
 {
        if (ctx->off_timeout_used || ctx->drain_active) {
@@ -1418,7 +1351,7 @@ static inline void io_cqring_wake(struct io_ring_ctx *ctx)
  * 1:1 relationship between how many times this function is called (and
  * hence the eventfd count) and number of CQEs posted to the CQ ring.
  */
-static inline void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 {
        if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
                     ctx->has_evfd))
@@ -1639,8 +1572,8 @@ static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
        }
 }
 
-static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
-                                    s32 res, u32 cflags)
+bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
+                    u32 cflags)
 {
        struct io_uring_cqe *cqe;
 
@@ -2980,8 +2913,8 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
        return u64_to_user_ptr(buf->addr);
 }
 
-static void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
-                                    unsigned int issue_flags)
+void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+                             unsigned int issue_flags)
 {
        struct io_ring_ctx *ctx = req->ctx;
        struct io_buffer_list *bl;
@@ -3073,13 +3006,6 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
        return __io_iov_buffer_select(req, iov, issue_flags);
 }
 
-static inline bool io_do_buffer_select(struct io_kiocb *req)
-{
-       if (!(req->flags & REQ_F_BUFFER_SELECT))
-               return false;
-       return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
-}
-
 static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req,
                                       struct io_rw_state *s,
                                       unsigned int issue_flags)
@@ -4025,755 +3951,6 @@ static __maybe_unused int io_eopnotsupp_prep(struct io_kiocb *kiocb,
        return -EOPNOTSUPP;
 }
 
-#if defined(CONFIG_NET)
-static int io_shutdown_prep(struct io_kiocb *req,
-                           const struct io_uring_sqe *sqe)
-{
-       struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
-
-       if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
-                    sqe->buf_index || sqe->splice_fd_in))
-               return -EINVAL;
-
-       shutdown->how = READ_ONCE(sqe->len);
-       return 0;
-}
-
-static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
-{
-       struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
-       struct socket *sock;
-       int ret;
-
-       if (issue_flags & IO_URING_F_NONBLOCK)
-               return -EAGAIN;
-
-       sock = sock_from_file(req->file);
-       if (unlikely(!sock))
-               return -ENOTSOCK;
-
-       ret = __sys_shutdown_sock(sock, shutdown->how);
-       io_req_set_res(req, ret, 0);
-       return IOU_OK;
-}
-
-static bool io_net_retry(struct socket *sock, int flags)
-{
-       if (!(flags & MSG_WAITALL))
-               return false;
-       return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
-}
-
-static int io_setup_async_msg(struct io_kiocb *req,
-                             struct io_async_msghdr *kmsg)
-{
-       struct io_async_msghdr *async_msg = req->async_data;
-
-       if (async_msg)
-               return -EAGAIN;
-       if (io_alloc_async_data(req)) {
-               kfree(kmsg->free_iov);
-               return -ENOMEM;
-       }
-       async_msg = req->async_data;
-       req->flags |= REQ_F_NEED_CLEANUP;
-       memcpy(async_msg, kmsg, sizeof(*kmsg));
-       async_msg->msg.msg_name = &async_msg->addr;
-       /* if were using fast_iov, set it to the new one */
-       if (!async_msg->free_iov)
-               async_msg->msg.msg_iter.iov = async_msg->fast_iov;
-
-       return -EAGAIN;
-}
-
-static int io_sendmsg_copy_hdr(struct io_kiocb *req,
-                              struct io_async_msghdr *iomsg)
-{
-       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
-
-       iomsg->msg.msg_name = &iomsg->addr;
-       iomsg->free_iov = iomsg->fast_iov;
-       return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
-                                       &iomsg->free_iov);
-}
-
-static int io_sendmsg_prep_async(struct io_kiocb *req)
-{
-       int ret;
-
-       ret = io_sendmsg_copy_hdr(req, req->async_data);
-       if (!ret)
-               req->flags |= REQ_F_NEED_CLEANUP;
-       return ret;
-}
-
-static void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
-{
-       struct io_async_msghdr *io = req->async_data;
-
-       kfree(io->free_iov);
-}
-
-static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
-
-       if (unlikely(sqe->file_index || sqe->addr2))
-               return -EINVAL;
-
-       sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
-       sr->len = READ_ONCE(sqe->len);
-       sr->flags = READ_ONCE(sqe->ioprio);
-       if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
-               return -EINVAL;
-       sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
-       if (sr->msg_flags & MSG_DONTWAIT)
-               req->flags |= REQ_F_NOWAIT;
-
-#ifdef CONFIG_COMPAT
-       if (req->ctx->compat)
-               sr->msg_flags |= MSG_CMSG_COMPAT;
-#endif
-       sr->done_io = 0;
-       return 0;
-}
-
-static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
-{
-       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
-       struct io_async_msghdr iomsg, *kmsg;
-       struct socket *sock;
-       unsigned flags;
-       int min_ret = 0;
-       int ret;
-
-       sock = sock_from_file(req->file);
-       if (unlikely(!sock))
-               return -ENOTSOCK;
-
-       if (req_has_async_data(req)) {
-               kmsg = req->async_data;
-       } else {
-               ret = io_sendmsg_copy_hdr(req, &iomsg);
-               if (ret)
-                       return ret;
-               kmsg = &iomsg;
-       }
-
-       if (!(req->flags & REQ_F_POLLED) &&
-           (sr->flags & IORING_RECVSEND_POLL_FIRST))
-               return io_setup_async_msg(req, kmsg);
-
-       flags = sr->msg_flags;
-       if (issue_flags & IO_URING_F_NONBLOCK)
-               flags |= MSG_DONTWAIT;
-       if (flags & MSG_WAITALL)
-               min_ret = iov_iter_count(&kmsg->msg.msg_iter);
-
-       ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
-
-       if (ret < min_ret) {
-               if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
-                       return io_setup_async_msg(req, kmsg);
-               if (ret == -ERESTARTSYS)
-                       ret = -EINTR;
-               if (ret > 0 && io_net_retry(sock, flags)) {
-                       sr->done_io += ret;
-                       req->flags |= REQ_F_PARTIAL_IO;
-                       return io_setup_async_msg(req, kmsg);
-               }
-               req_set_fail(req);
-       }
-       /* fast path, check for non-NULL to avoid function call */
-       if (kmsg->free_iov)
-               kfree(kmsg->free_iov);
-       req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret >= 0)
-               ret += sr->done_io;
-       else if (sr->done_io)
-               ret = sr->done_io;
-       io_req_set_res(req, ret, 0);
-       return IOU_OK;
-}
-
-static int io_send(struct io_kiocb *req, unsigned int issue_flags)
-{
-       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
-       struct msghdr msg;
-       struct iovec iov;
-       struct socket *sock;
-       unsigned flags;
-       int min_ret = 0;
-       int ret;
-
-       if (!(req->flags & REQ_F_POLLED) &&
-           (sr->flags & IORING_RECVSEND_POLL_FIRST))
-               return -EAGAIN;
-
-       sock = sock_from_file(req->file);
-       if (unlikely(!sock))
-               return -ENOTSOCK;
-
-       ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
-       if (unlikely(ret))
-               return ret;
-
-       msg.msg_name = NULL;
-       msg.msg_control = NULL;
-       msg.msg_controllen = 0;
-       msg.msg_namelen = 0;
-
-       flags = sr->msg_flags;
-       if (issue_flags & IO_URING_F_NONBLOCK)
-               flags |= MSG_DONTWAIT;
-       if (flags & MSG_WAITALL)
-               min_ret = iov_iter_count(&msg.msg_iter);
-
-       msg.msg_flags = flags;
-       ret = sock_sendmsg(sock, &msg);
-       if (ret < min_ret) {
-               if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
-                       return -EAGAIN;
-               if (ret == -ERESTARTSYS)
-                       ret = -EINTR;
-               if (ret > 0 && io_net_retry(sock, flags)) {
-                       sr->len -= ret;
-                       sr->buf += ret;
-                       sr->done_io += ret;
-                       req->flags |= REQ_F_PARTIAL_IO;
-                       return -EAGAIN;
-               }
-               req_set_fail(req);
-       }
-       if (ret >= 0)
-               ret += sr->done_io;
-       else if (sr->done_io)
-               ret = sr->done_io;
-       io_req_set_res(req, ret, 0);
-       return IOU_OK;
-}
-
-static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
-                                struct io_async_msghdr *iomsg)
-{
-       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
-       struct iovec __user *uiov;
-       size_t iov_len;
-       int ret;
-
-       ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg,
-                                       &iomsg->uaddr, &uiov, &iov_len);
-       if (ret)
-               return ret;
-
-       if (req->flags & REQ_F_BUFFER_SELECT) {
-               if (iov_len > 1)
-                       return -EINVAL;
-               if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov)))
-                       return -EFAULT;
-               sr->len = iomsg->fast_iov[0].iov_len;
-               iomsg->free_iov = NULL;
-       } else {
-               iomsg->free_iov = iomsg->fast_iov;
-               ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
-                                    &iomsg->free_iov, &iomsg->msg.msg_iter,
-                                    false);
-               if (ret > 0)
-                       ret = 0;
-       }
-
-       return ret;
-}
-
-#ifdef CONFIG_COMPAT
-static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
-                                       struct io_async_msghdr *iomsg)
-{
-       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
-       struct compat_iovec __user *uiov;
-       compat_uptr_t ptr;
-       compat_size_t len;
-       int ret;
-
-       ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr,
-                                 &ptr, &len);
-       if (ret)
-               return ret;
-
-       uiov = compat_ptr(ptr);
-       if (req->flags & REQ_F_BUFFER_SELECT) {
-               compat_ssize_t clen;
-
-               if (len > 1)
-                       return -EINVAL;
-               if (!access_ok(uiov, sizeof(*uiov)))
-                       return -EFAULT;
-               if (__get_user(clen, &uiov->iov_len))
-                       return -EFAULT;
-               if (clen < 0)
-                       return -EINVAL;
-               sr->len = clen;
-               iomsg->free_iov = NULL;
-       } else {
-               iomsg->free_iov = iomsg->fast_iov;
-               ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
-                                  UIO_FASTIOV, &iomsg->free_iov,
-                                  &iomsg->msg.msg_iter, true);
-               if (ret < 0)
-                       return ret;
-       }
-
-       return 0;
-}
-#endif
-
-static int io_recvmsg_copy_hdr(struct io_kiocb *req,
-                              struct io_async_msghdr *iomsg)
-{
-       iomsg->msg.msg_name = &iomsg->addr;
-
-#ifdef CONFIG_COMPAT
-       if (req->ctx->compat)
-               return __io_compat_recvmsg_copy_hdr(req, iomsg);
-#endif
-
-       return __io_recvmsg_copy_hdr(req, iomsg);
-}
-
-static int io_recvmsg_prep_async(struct io_kiocb *req)
-{
-       int ret;
-
-       ret = io_recvmsg_copy_hdr(req, req->async_data);
-       if (!ret)
-               req->flags |= REQ_F_NEED_CLEANUP;
-       return ret;
-}
-
-static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
-
-       if (unlikely(sqe->file_index || sqe->addr2))
-               return -EINVAL;
-
-       sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
-       sr->len = READ_ONCE(sqe->len);
-       sr->flags = READ_ONCE(sqe->ioprio);
-       if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
-               return -EINVAL;
-       sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
-       if (sr->msg_flags & MSG_DONTWAIT)
-               req->flags |= REQ_F_NOWAIT;
-       if (sr->msg_flags & MSG_ERRQUEUE)
-               req->flags |= REQ_F_CLEAR_POLLIN;
-
-#ifdef CONFIG_COMPAT
-       if (req->ctx->compat)
-               sr->msg_flags |= MSG_CMSG_COMPAT;
-#endif
-       sr->done_io = 0;
-       return 0;
-}
-
-static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
-{
-       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
-       struct io_async_msghdr iomsg, *kmsg;
-       struct socket *sock;
-       unsigned int cflags;
-       unsigned flags;
-       int ret, min_ret = 0;
-       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
-
-       sock = sock_from_file(req->file);
-       if (unlikely(!sock))
-               return -ENOTSOCK;
-
-       if (req_has_async_data(req)) {
-               kmsg = req->async_data;
-       } else {
-               ret = io_recvmsg_copy_hdr(req, &iomsg);
-               if (ret)
-                       return ret;
-               kmsg = &iomsg;
-       }
-
-       if (!(req->flags & REQ_F_POLLED) &&
-           (sr->flags & IORING_RECVSEND_POLL_FIRST))
-               return io_setup_async_msg(req, kmsg);
-
-       if (io_do_buffer_select(req)) {
-               void __user *buf;
-
-               buf = io_buffer_select(req, &sr->len, issue_flags);
-               if (!buf)
-                       return -ENOBUFS;
-               kmsg->fast_iov[0].iov_base = buf;
-               kmsg->fast_iov[0].iov_len = sr->len;
-               iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
-                               sr->len);
-       }
-
-       flags = sr->msg_flags;
-       if (force_nonblock)
-               flags |= MSG_DONTWAIT;
-       if (flags & MSG_WAITALL)
-               min_ret = iov_iter_count(&kmsg->msg.msg_iter);
-
-       kmsg->msg.msg_get_inq = 1;
-       ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
-       if (ret < min_ret) {
-               if (ret == -EAGAIN && force_nonblock)
-                       return io_setup_async_msg(req, kmsg);
-               if (ret == -ERESTARTSYS)
-                       ret = -EINTR;
-               if (ret > 0 && io_net_retry(sock, flags)) {
-                       sr->done_io += ret;
-                       req->flags |= REQ_F_PARTIAL_IO;
-                       return io_setup_async_msg(req, kmsg);
-               }
-               req_set_fail(req);
-       } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
-               req_set_fail(req);
-       }
-
-       /* fast path, check for non-NULL to avoid function call */
-       if (kmsg->free_iov)
-               kfree(kmsg->free_iov);
-       req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret >= 0)
-               ret += sr->done_io;
-       else if (sr->done_io)
-               ret = sr->done_io;
-       cflags = io_put_kbuf(req, issue_flags);
-       if (kmsg->msg.msg_inq)
-               cflags |= IORING_CQE_F_SOCK_NONEMPTY;
-       io_req_set_res(req, ret, cflags);
-       return IOU_OK;
-}
-
-static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
-{
-       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
-       struct msghdr msg;
-       struct socket *sock;
-       struct iovec iov;
-       unsigned int cflags;
-       unsigned flags;
-       int ret, min_ret = 0;
-       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
-
-       if (!(req->flags & REQ_F_POLLED) &&
-           (sr->flags & IORING_RECVSEND_POLL_FIRST))
-               return -EAGAIN;
-
-       sock = sock_from_file(req->file);
-       if (unlikely(!sock))
-               return -ENOTSOCK;
-
-       if (io_do_buffer_select(req)) {
-               void __user *buf;
-
-               buf = io_buffer_select(req, &sr->len, issue_flags);
-               if (!buf)
-                       return -ENOBUFS;
-               sr->buf = buf;
-       }
-
-       ret = import_single_range(READ, sr->buf, sr->len, &iov, &msg.msg_iter);
-       if (unlikely(ret))
-               goto out_free;
-
-       msg.msg_name = NULL;
-       msg.msg_namelen = 0;
-       msg.msg_control = NULL;
-       msg.msg_get_inq = 1;
-       msg.msg_flags = 0;
-       msg.msg_controllen = 0;
-       msg.msg_iocb = NULL;
-
-       flags = sr->msg_flags;
-       if (force_nonblock)
-               flags |= MSG_DONTWAIT;
-       if (flags & MSG_WAITALL)
-               min_ret = iov_iter_count(&msg.msg_iter);
-
-       ret = sock_recvmsg(sock, &msg, flags);
-       if (ret < min_ret) {
-               if (ret == -EAGAIN && force_nonblock)
-                       return -EAGAIN;
-               if (ret == -ERESTARTSYS)
-                       ret = -EINTR;
-               if (ret > 0 && io_net_retry(sock, flags)) {
-                       sr->len -= ret;
-                       sr->buf += ret;
-                       sr->done_io += ret;
-                       req->flags |= REQ_F_PARTIAL_IO;
-                       return -EAGAIN;
-               }
-               req_set_fail(req);
-       } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
-out_free:
-               req_set_fail(req);
-       }
-
-       if (ret >= 0)
-               ret += sr->done_io;
-       else if (sr->done_io)
-               ret = sr->done_io;
-       cflags = io_put_kbuf(req, issue_flags);
-       if (msg.msg_inq)
-               cflags |= IORING_CQE_F_SOCK_NONEMPTY;
-       io_req_set_res(req, ret, cflags);
-       return IOU_OK;
-}
-
-static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-       struct io_accept *accept = io_kiocb_to_cmd(req);
-       unsigned flags;
-
-       if (sqe->len || sqe->buf_index)
-               return -EINVAL;
-
-       accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
-       accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
-       accept->flags = READ_ONCE(sqe->accept_flags);
-       accept->nofile = rlimit(RLIMIT_NOFILE);
-       flags = READ_ONCE(sqe->ioprio);
-       if (flags & ~IORING_ACCEPT_MULTISHOT)
-               return -EINVAL;
-
-       accept->file_slot = READ_ONCE(sqe->file_index);
-       if (accept->file_slot) {
-               if (accept->flags & SOCK_CLOEXEC)
-                       return -EINVAL;
-               if (flags & IORING_ACCEPT_MULTISHOT &&
-                   accept->file_slot != IORING_FILE_INDEX_ALLOC)
-                       return -EINVAL;
-       }
-       if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
-               return -EINVAL;
-       if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
-               accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
-       if (flags & IORING_ACCEPT_MULTISHOT)
-               req->flags |= REQ_F_APOLL_MULTISHOT;
-       return 0;
-}
-
-static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
-{
-       struct io_ring_ctx *ctx = req->ctx;
-       struct io_accept *accept = io_kiocb_to_cmd(req);
-       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
-       unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
-       bool fixed = !!accept->file_slot;
-       struct file *file;
-       int ret, fd;
-
-retry:
-       if (!fixed) {
-               fd = __get_unused_fd_flags(accept->flags, accept->nofile);
-               if (unlikely(fd < 0))
-                       return fd;
-       }
-       file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
-                        accept->flags);
-       if (IS_ERR(file)) {
-               if (!fixed)
-                       put_unused_fd(fd);
-               ret = PTR_ERR(file);
-               if (ret == -EAGAIN && force_nonblock) {
-                       /*
-                        * if it's multishot and polled, we don't need to
-                        * return EAGAIN to arm the poll infra since it
-                        * has already been done
-                        */
-                       if ((req->flags & IO_APOLL_MULTI_POLLED) ==
-                           IO_APOLL_MULTI_POLLED)
-                               ret = IOU_ISSUE_SKIP_COMPLETE;
-                       return ret;
-               }
-               if (ret == -ERESTARTSYS)
-                       ret = -EINTR;
-               req_set_fail(req);
-       } else if (!fixed) {
-               fd_install(fd, file);
-               ret = fd;
-       } else {
-               ret = io_fixed_fd_install(req, issue_flags, file,
-                                               accept->file_slot);
-       }
-
-       if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
-               io_req_set_res(req, ret, 0);
-               return IOU_OK;
-       }
-       if (ret >= 0) {
-               bool filled;
-
-               spin_lock(&ctx->completion_lock);
-               filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret,
-                                        IORING_CQE_F_MORE);
-               io_commit_cqring(ctx);
-               spin_unlock(&ctx->completion_lock);
-               if (filled) {
-                       io_cqring_ev_posted(ctx);
-                       goto retry;
-               }
-               ret = -ECANCELED;
-       }
-
-       return ret;
-}
-
-static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-       struct io_socket *sock = io_kiocb_to_cmd(req);
-
-       if (sqe->addr || sqe->rw_flags || sqe->buf_index)
-               return -EINVAL;
-
-       sock->domain = READ_ONCE(sqe->fd);
-       sock->type = READ_ONCE(sqe->off);
-       sock->protocol = READ_ONCE(sqe->len);
-       sock->file_slot = READ_ONCE(sqe->file_index);
-       sock->nofile = rlimit(RLIMIT_NOFILE);
-
-       sock->flags = sock->type & ~SOCK_TYPE_MASK;
-       if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
-               return -EINVAL;
-       if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
-               return -EINVAL;
-       return 0;
-}
-
-static int io_socket(struct io_kiocb *req, unsigned int issue_flags)
-{
-       struct io_socket *sock = io_kiocb_to_cmd(req);
-       bool fixed = !!sock->file_slot;
-       struct file *file;
-       int ret, fd;
-
-       if (!fixed) {
-               fd = __get_unused_fd_flags(sock->flags, sock->nofile);
-               if (unlikely(fd < 0))
-                       return fd;
-       }
-       file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
-       if (IS_ERR(file)) {
-               if (!fixed)
-                       put_unused_fd(fd);
-               ret = PTR_ERR(file);
-               if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
-                       return -EAGAIN;
-               if (ret == -ERESTARTSYS)
-                       ret = -EINTR;
-               req_set_fail(req);
-       } else if (!fixed) {
-               fd_install(fd, file);
-               ret = fd;
-       } else {
-               ret = io_fixed_fd_install(req, issue_flags, file,
-                                           sock->file_slot);
-       }
-       io_req_set_res(req, ret, 0);
-       return IOU_OK;
-}
-
-static int io_connect_prep_async(struct io_kiocb *req)
-{
-       struct io_async_connect *io = req->async_data;
-       struct io_connect *conn = io_kiocb_to_cmd(req);
-
-       return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
-}
-
-static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-       struct io_connect *conn = io_kiocb_to_cmd(req);
-
-       if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
-               return -EINVAL;
-
-       conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
-       conn->addr_len =  READ_ONCE(sqe->addr2);
-       return 0;
-}
-
-static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
-{
-       struct io_connect *connect = io_kiocb_to_cmd(req);
-       struct io_async_connect __io, *io;
-       unsigned file_flags;
-       int ret;
-       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
-
-       if (req_has_async_data(req)) {
-               io = req->async_data;
-       } else {
-               ret = move_addr_to_kernel(connect->addr,
-                                               connect->addr_len,
-                                               &__io.address);
-               if (ret)
-                       goto out;
-               io = &__io;
-       }
-
-       file_flags = force_nonblock ? O_NONBLOCK : 0;
-
-       ret = __sys_connect_file(req->file, &io->address,
-                                       connect->addr_len, file_flags);
-       if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
-               if (req_has_async_data(req))
-                       return -EAGAIN;
-               if (io_alloc_async_data(req)) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
-               memcpy(req->async_data, &__io, sizeof(__io));
-               return -EAGAIN;
-       }
-       if (ret == -ERESTARTSYS)
-               ret = -EINTR;
-out:
-       if (ret < 0)
-               req_set_fail(req);
-       io_req_set_res(req, ret, 0);
-       return IOU_OK;
-}
-#else /* !CONFIG_NET */
-#define IO_NETOP_FN(op)                                                        \
-static int io_##op(struct io_kiocb *req, unsigned int issue_flags)     \
-{                                                                      \
-       return -EOPNOTSUPP;                                             \
-}
-
-#define IO_NETOP_PREP(op)                                              \
-IO_NETOP_FN(op)                                                                \
-static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \
-{                                                                      \
-       return -EOPNOTSUPP;                                             \
-}                                                                      \
-
-#define IO_NETOP_PREP_ASYNC(op)                                                \
-IO_NETOP_PREP(op)                                                      \
-static int io_##op##_prep_async(struct io_kiocb *req)                  \
-{                                                                      \
-       return -EOPNOTSUPP;                                             \
-}
-
-IO_NETOP_PREP_ASYNC(sendmsg);
-IO_NETOP_PREP_ASYNC(recvmsg);
-IO_NETOP_PREP_ASYNC(connect);
-IO_NETOP_PREP(accept);
-IO_NETOP_PREP(socket);
-IO_NETOP_PREP(shutdown);
-IO_NETOP_FN(send);
-IO_NETOP_FN(recv);
-#endif /* CONFIG_NET */
-
 struct io_poll_table {
        struct poll_table_struct pt;
        struct io_kiocb *req;
@@ -7874,8 +7051,8 @@ int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
        return 0;
 }
 
-static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
-                                unsigned int issue_flags, u32 slot_index)
+int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+                         unsigned int issue_flags, u32 slot_index)
        __must_hold(&req->ctx->uring_lock)
 {
        struct io_ring_ctx *ctx = req->ctx;
@@ -10986,12 +10163,14 @@ static const struct io_op_def io_op_defs[] = {
                .unbound_nonreg_file    = 1,
                .pollout                = 1,
                .ioprio                 = 1,
+#if defined(CONFIG_NET)
                .async_size             = sizeof(struct io_async_msghdr),
                .prep                   = io_sendmsg_prep,
                .issue                  = io_sendmsg,
                .prep_async             = io_sendmsg_prep_async,
-#if defined(CONFIG_NET)
                .cleanup                = io_sendmsg_recvmsg_cleanup,
+#else
+               .prep                   = io_eopnotsupp_prep,
 #endif
        },
        [IORING_OP_RECVMSG] = {
@@ -11000,12 +10179,14 @@ static const struct io_op_def io_op_defs[] = {
                .pollin                 = 1,
                .buffer_select          = 1,
                .ioprio                 = 1,
+#if defined(CONFIG_NET)
                .async_size             = sizeof(struct io_async_msghdr),
                .prep                   = io_recvmsg_prep,
                .issue                  = io_recvmsg,
                .prep_async             = io_recvmsg_prep_async,
-#if defined(CONFIG_NET)
                .cleanup                = io_sendmsg_recvmsg_cleanup,
+#else
+               .prep                   = io_eopnotsupp_prep,
 #endif
        },
        [IORING_OP_TIMEOUT] = {
@@ -11026,8 +10207,12 @@ static const struct io_op_def io_op_defs[] = {
                .pollin                 = 1,
                .poll_exclusive         = 1,
                .ioprio                 = 1,    /* used for flags */
+#if defined(CONFIG_NET)
                .prep                   = io_accept_prep,
                .issue                  = io_accept,
+#else
+               .prep                   = io_eopnotsupp_prep,
+#endif
        },
        [IORING_OP_ASYNC_CANCEL] = {
                .audit_skip             = 1,
@@ -11044,10 +10229,14 @@ static const struct io_op_def io_op_defs[] = {
                .needs_file             = 1,
                .unbound_nonreg_file    = 1,
                .pollout                = 1,
+#if defined(CONFIG_NET)
                .async_size             = sizeof(struct io_async_connect),
                .prep                   = io_connect_prep,
                .issue                  = io_connect,
                .prep_async             = io_connect_prep_async,
+#else
+               .prep                   = io_eopnotsupp_prep,
+#endif
        },
        [IORING_OP_FALLOCATE] = {
                .needs_file             = 1,
@@ -11117,8 +10306,12 @@ static const struct io_op_def io_op_defs[] = {
                .pollout                = 1,
                .audit_skip             = 1,
                .ioprio                 = 1,
+#if defined(CONFIG_NET)
                .prep                   = io_sendmsg_prep,
                .issue                  = io_send,
+#else
+               .prep                   = io_eopnotsupp_prep,
+#endif
        },
        [IORING_OP_RECV] = {
                .needs_file             = 1,
@@ -11127,8 +10320,12 @@ static const struct io_op_def io_op_defs[] = {
                .buffer_select          = 1,
                .audit_skip             = 1,
                .ioprio                 = 1,
+#if defined(CONFIG_NET)
                .prep                   = io_recvmsg_prep,
                .issue                  = io_recv,
+#else
+               .prep                   = io_eopnotsupp_prep,
+#endif
        },
        [IORING_OP_OPENAT2] = {
                .prep                   = io_openat2_prep,
@@ -11175,8 +10372,12 @@ static const struct io_op_def io_op_defs[] = {
        },
        [IORING_OP_SHUTDOWN] = {
                .needs_file             = 1,
+#if defined(CONFIG_NET)
                .prep                   = io_shutdown_prep,
                .issue                  = io_shutdown,
+#else
+               .prep                   = io_eopnotsupp_prep,
+#endif
        },
        [IORING_OP_RENAMEAT] = {
                .prep                   = io_renameat_prep,
@@ -11233,8 +10434,12 @@ static const struct io_op_def io_op_defs[] = {
        },
        [IORING_OP_SOCKET] = {
                .audit_skip             = 1,
+#if defined(CONFIG_NET)
                .prep                   = io_socket_prep,
                .issue                  = io_socket,
+#else
+               .prep                   = io_eopnotsupp_prep,
+#endif
        },
        [IORING_OP_URING_CMD] = {
                .needs_file             = 1,
index 6a07e90..4b46385 100644 (file)
@@ -58,13 +58,35 @@ static inline void io_ring_submit_lock(struct io_ring_ctx *ctx,
        lockdep_assert_held(&ctx->uring_lock);
 }
 
+static inline void io_commit_cqring(struct io_ring_ctx *ctx)
+{
+       /* order cqe stores with ring update */
+       smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
+}
+
 void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
 
+bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
+                    u32 cflags);
+void io_cqring_ev_posted(struct io_ring_ctx *ctx);
+void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+                             unsigned int issue_flags);
+unsigned int io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
+
+static inline bool io_do_buffer_select(struct io_kiocb *req)
+{
+       if (!(req->flags & REQ_F_BUFFER_SELECT))
+               return false;
+       return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
+}
+
 struct file *io_file_get_normal(struct io_kiocb *req, int fd);
 struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
                               unsigned issue_flags);
 int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
                        struct file *file, unsigned int file_slot);
+int io_install_fixed_file(struct io_kiocb *req, struct file *file,
+                         unsigned int issue_flags, u32 slot_index);
 
 int io_rsrc_node_switch_start(struct io_ring_ctx *ctx);
 int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
diff --git a/io_uring/net.c b/io_uring/net.c
new file mode 100644 (file)
index 0000000..2434548
--- /dev/null
@@ -0,0 +1,779 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/compat.h>
+#include <net/compat.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "io_uring_types.h"
+#include "io_uring.h"
+#include "net.h"
+
+#if defined(CONFIG_NET)
+struct io_shutdown {
+       struct file                     *file;
+       int                             how;
+};
+
+struct io_accept {
+       struct file                     *file;
+       struct sockaddr __user          *addr;
+       int __user                      *addr_len;
+       int                             flags;
+       u32                             file_slot;
+       unsigned long                   nofile;
+};
+
+struct io_socket {
+       struct file                     *file;
+       int                             domain;
+       int                             type;
+       int                             protocol;
+       int                             flags;
+       u32                             file_slot;
+       unsigned long                   nofile;
+};
+
+struct io_connect {
+       struct file                     *file;
+       struct sockaddr __user          *addr;
+       int                             addr_len;
+};
+
+struct io_sr_msg {
+       struct file                     *file;
+       union {
+               struct compat_msghdr __user     *umsg_compat;
+               struct user_msghdr __user       *umsg;
+               void __user                     *buf;
+       };
+       int                             msg_flags;
+       size_t                          len;
+       size_t                          done_io;
+       unsigned int                    flags;
+};
+
+#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
+
+int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
+
+       if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
+                    sqe->buf_index || sqe->splice_fd_in))
+               return -EINVAL;
+
+       shutdown->how = READ_ONCE(sqe->len);
+       return 0;
+}
+
+int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
+       struct socket *sock;
+       int ret;
+
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               return -EAGAIN;
+
+       sock = sock_from_file(req->file);
+       if (unlikely(!sock))
+               return -ENOTSOCK;
+
+       ret = __sys_shutdown_sock(sock, shutdown->how);
+       io_req_set_res(req, ret, 0);
+       return IOU_OK;
+}
+
+static bool io_net_retry(struct socket *sock, int flags)
+{
+       if (!(flags & MSG_WAITALL))
+               return false;
+       return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
+}
+
+static int io_setup_async_msg(struct io_kiocb *req,
+                             struct io_async_msghdr *kmsg)
+{
+       struct io_async_msghdr *async_msg = req->async_data;
+
+       if (async_msg)
+               return -EAGAIN;
+       if (io_alloc_async_data(req)) {
+               kfree(kmsg->free_iov);
+               return -ENOMEM;
+       }
+       async_msg = req->async_data;
+       req->flags |= REQ_F_NEED_CLEANUP;
+       memcpy(async_msg, kmsg, sizeof(*kmsg));
+       async_msg->msg.msg_name = &async_msg->addr;
+       /* if were using fast_iov, set it to the new one */
+       if (!async_msg->free_iov)
+               async_msg->msg.msg_iter.iov = async_msg->fast_iov;
+
+       return -EAGAIN;
+}
+
+static int io_sendmsg_copy_hdr(struct io_kiocb *req,
+                              struct io_async_msghdr *iomsg)
+{
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+
+       iomsg->msg.msg_name = &iomsg->addr;
+       iomsg->free_iov = iomsg->fast_iov;
+       return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
+                                       &iomsg->free_iov);
+}
+
+int io_sendmsg_prep_async(struct io_kiocb *req)
+{
+       int ret;
+
+       ret = io_sendmsg_copy_hdr(req, req->async_data);
+       if (!ret)
+               req->flags |= REQ_F_NEED_CLEANUP;
+       return ret;
+}
+
+void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
+{
+       struct io_async_msghdr *io = req->async_data;
+
+       kfree(io->free_iov);
+}
+
+int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+
+       if (unlikely(sqe->file_index || sqe->addr2))
+               return -EINVAL;
+
+       sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       sr->len = READ_ONCE(sqe->len);
+       sr->flags = READ_ONCE(sqe->ioprio);
+       if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+               return -EINVAL;
+       sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
+       if (sr->msg_flags & MSG_DONTWAIT)
+               req->flags |= REQ_F_NOWAIT;
+
+#ifdef CONFIG_COMPAT
+       if (req->ctx->compat)
+               sr->msg_flags |= MSG_CMSG_COMPAT;
+#endif
+       sr->done_io = 0;
+       return 0;
+}
+
+int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+       struct io_async_msghdr iomsg, *kmsg;
+       struct socket *sock;
+       unsigned flags;
+       int min_ret = 0;
+       int ret;
+
+       sock = sock_from_file(req->file);
+       if (unlikely(!sock))
+               return -ENOTSOCK;
+
+       if (req_has_async_data(req)) {
+               kmsg = req->async_data;
+       } else {
+               ret = io_sendmsg_copy_hdr(req, &iomsg);
+               if (ret)
+                       return ret;
+               kmsg = &iomsg;
+       }
+
+       if (!(req->flags & REQ_F_POLLED) &&
+           (sr->flags & IORING_RECVSEND_POLL_FIRST))
+               return io_setup_async_msg(req, kmsg);
+
+       flags = sr->msg_flags;
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               flags |= MSG_DONTWAIT;
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
+       ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
+
+       if (ret < min_ret) {
+               if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
+                       return io_setup_async_msg(req, kmsg);
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+               if (ret > 0 && io_net_retry(sock, flags)) {
+                       sr->done_io += ret;
+                       req->flags |= REQ_F_PARTIAL_IO;
+                       return io_setup_async_msg(req, kmsg);
+               }
+               req_set_fail(req);
+       }
+       /* fast path, check for non-NULL to avoid function call */
+       if (kmsg->free_iov)
+               kfree(kmsg->free_iov);
+       req->flags &= ~REQ_F_NEED_CLEANUP;
+       if (ret >= 0)
+               ret += sr->done_io;
+       else if (sr->done_io)
+               ret = sr->done_io;
+       io_req_set_res(req, ret, 0);
+       return IOU_OK;
+}
+
+int io_send(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+       struct msghdr msg;
+       struct iovec iov;
+       struct socket *sock;
+       unsigned flags;
+       int min_ret = 0;
+       int ret;
+
+       if (!(req->flags & REQ_F_POLLED) &&
+           (sr->flags & IORING_RECVSEND_POLL_FIRST))
+               return -EAGAIN;
+
+       sock = sock_from_file(req->file);
+       if (unlikely(!sock))
+               return -ENOTSOCK;
+
+       ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
+       if (unlikely(ret))
+               return ret;
+
+       msg.msg_name = NULL;
+       msg.msg_control = NULL;
+       msg.msg_controllen = 0;
+       msg.msg_namelen = 0;
+
+       flags = sr->msg_flags;
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               flags |= MSG_DONTWAIT;
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&msg.msg_iter);
+
+       msg.msg_flags = flags;
+       ret = sock_sendmsg(sock, &msg);
+       if (ret < min_ret) {
+               if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
+                       return -EAGAIN;
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+               if (ret > 0 && io_net_retry(sock, flags)) {
+                       sr->len -= ret;
+                       sr->buf += ret;
+                       sr->done_io += ret;
+                       req->flags |= REQ_F_PARTIAL_IO;
+                       return -EAGAIN;
+               }
+               req_set_fail(req);
+       }
+       if (ret >= 0)
+               ret += sr->done_io;
+       else if (sr->done_io)
+               ret = sr->done_io;
+       io_req_set_res(req, ret, 0);
+       return IOU_OK;
+}
+
+static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
+                                struct io_async_msghdr *iomsg)
+{
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+       struct iovec __user *uiov;
+       size_t iov_len;
+       int ret;
+
+       ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg,
+                                       &iomsg->uaddr, &uiov, &iov_len);
+       if (ret)
+               return ret;
+
+       if (req->flags & REQ_F_BUFFER_SELECT) {
+               if (iov_len > 1)
+                       return -EINVAL;
+               if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov)))
+                       return -EFAULT;
+               sr->len = iomsg->fast_iov[0].iov_len;
+               iomsg->free_iov = NULL;
+       } else {
+               iomsg->free_iov = iomsg->fast_iov;
+               ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
+                                    &iomsg->free_iov, &iomsg->msg.msg_iter,
+                                    false);
+               if (ret > 0)
+                       ret = 0;
+       }
+
+       return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
+                                       struct io_async_msghdr *iomsg)
+{
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+       struct compat_iovec __user *uiov;
+       compat_uptr_t ptr;
+       compat_size_t len;
+       int ret;
+
+       ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr,
+                                 &ptr, &len);
+       if (ret)
+               return ret;
+
+       uiov = compat_ptr(ptr);
+       if (req->flags & REQ_F_BUFFER_SELECT) {
+               compat_ssize_t clen;
+
+               if (len > 1)
+                       return -EINVAL;
+               if (!access_ok(uiov, sizeof(*uiov)))
+                       return -EFAULT;
+               if (__get_user(clen, &uiov->iov_len))
+                       return -EFAULT;
+               if (clen < 0)
+                       return -EINVAL;
+               sr->len = clen;
+               iomsg->free_iov = NULL;
+       } else {
+               iomsg->free_iov = iomsg->fast_iov;
+               ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
+                                  UIO_FASTIOV, &iomsg->free_iov,
+                                  &iomsg->msg.msg_iter, true);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+#endif
+
+static int io_recvmsg_copy_hdr(struct io_kiocb *req,
+                              struct io_async_msghdr *iomsg)
+{
+       iomsg->msg.msg_name = &iomsg->addr;
+
+#ifdef CONFIG_COMPAT
+       if (req->ctx->compat)
+               return __io_compat_recvmsg_copy_hdr(req, iomsg);
+#endif
+
+       return __io_recvmsg_copy_hdr(req, iomsg);
+}
+
+int io_recvmsg_prep_async(struct io_kiocb *req)
+{
+       int ret;
+
+       ret = io_recvmsg_copy_hdr(req, req->async_data);
+       if (!ret)
+               req->flags |= REQ_F_NEED_CLEANUP;
+       return ret;
+}
+
+int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+
+       if (unlikely(sqe->file_index || sqe->addr2))
+               return -EINVAL;
+
+       sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       sr->len = READ_ONCE(sqe->len);
+       sr->flags = READ_ONCE(sqe->ioprio);
+       if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
+               return -EINVAL;
+       sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
+       if (sr->msg_flags & MSG_DONTWAIT)
+               req->flags |= REQ_F_NOWAIT;
+       if (sr->msg_flags & MSG_ERRQUEUE)
+               req->flags |= REQ_F_CLEAR_POLLIN;
+
+#ifdef CONFIG_COMPAT
+       if (req->ctx->compat)
+               sr->msg_flags |= MSG_CMSG_COMPAT;
+#endif
+       sr->done_io = 0;
+       return 0;
+}
+
+int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+       struct io_async_msghdr iomsg, *kmsg;
+       struct socket *sock;
+       unsigned int cflags;
+       unsigned flags;
+       int ret, min_ret = 0;
+       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+
+       sock = sock_from_file(req->file);
+       if (unlikely(!sock))
+               return -ENOTSOCK;
+
+       if (req_has_async_data(req)) {
+               kmsg = req->async_data;
+       } else {
+               ret = io_recvmsg_copy_hdr(req, &iomsg);
+               if (ret)
+                       return ret;
+               kmsg = &iomsg;
+       }
+
+       if (!(req->flags & REQ_F_POLLED) &&
+           (sr->flags & IORING_RECVSEND_POLL_FIRST))
+               return io_setup_async_msg(req, kmsg);
+
+       if (io_do_buffer_select(req)) {
+               void __user *buf;
+
+               buf = io_buffer_select(req, &sr->len, issue_flags);
+               if (!buf)
+                       return -ENOBUFS;
+               kmsg->fast_iov[0].iov_base = buf;
+               kmsg->fast_iov[0].iov_len = sr->len;
+               iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1,
+                               sr->len);
+       }
+
+       flags = sr->msg_flags;
+       if (force_nonblock)
+               flags |= MSG_DONTWAIT;
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
+       kmsg->msg.msg_get_inq = 1;
+       ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
+       if (ret < min_ret) {
+               if (ret == -EAGAIN && force_nonblock)
+                       return io_setup_async_msg(req, kmsg);
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+               if (ret > 0 && io_net_retry(sock, flags)) {
+                       sr->done_io += ret;
+                       req->flags |= REQ_F_PARTIAL_IO;
+                       return io_setup_async_msg(req, kmsg);
+               }
+               req_set_fail(req);
+       } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+               req_set_fail(req);
+       }
+
+       /* fast path, check for non-NULL to avoid function call */
+       if (kmsg->free_iov)
+               kfree(kmsg->free_iov);
+       req->flags &= ~REQ_F_NEED_CLEANUP;
+       if (ret >= 0)
+               ret += sr->done_io;
+       else if (sr->done_io)
+               ret = sr->done_io;
+       cflags = io_put_kbuf(req, issue_flags);
+       if (kmsg->msg.msg_inq)
+               cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+       io_req_set_res(req, ret, cflags);
+       return IOU_OK;
+}
+
+int io_recv(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+       struct msghdr msg;
+       struct socket *sock;
+       struct iovec iov;
+       unsigned int cflags;
+       unsigned flags;
+       int ret, min_ret = 0;
+       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+
+       if (!(req->flags & REQ_F_POLLED) &&
+           (sr->flags & IORING_RECVSEND_POLL_FIRST))
+               return -EAGAIN;
+
+       sock = sock_from_file(req->file);
+       if (unlikely(!sock))
+               return -ENOTSOCK;
+
+       if (io_do_buffer_select(req)) {
+               void __user *buf;
+
+               buf = io_buffer_select(req, &sr->len, issue_flags);
+               if (!buf)
+                       return -ENOBUFS;
+               sr->buf = buf;
+       }
+
+       ret = import_single_range(READ, sr->buf, sr->len, &iov, &msg.msg_iter);
+       if (unlikely(ret))
+               goto out_free;
+
+       msg.msg_name = NULL;
+       msg.msg_namelen = 0;
+       msg.msg_control = NULL;
+       msg.msg_get_inq = 1;
+       msg.msg_flags = 0;
+       msg.msg_controllen = 0;
+       msg.msg_iocb = NULL;
+
+       flags = sr->msg_flags;
+       if (force_nonblock)
+               flags |= MSG_DONTWAIT;
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&msg.msg_iter);
+
+       ret = sock_recvmsg(sock, &msg, flags);
+       if (ret < min_ret) {
+               if (ret == -EAGAIN && force_nonblock)
+                       return -EAGAIN;
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+               if (ret > 0 && io_net_retry(sock, flags)) {
+                       sr->len -= ret;
+                       sr->buf += ret;
+                       sr->done_io += ret;
+                       req->flags |= REQ_F_PARTIAL_IO;
+                       return -EAGAIN;
+               }
+               req_set_fail(req);
+       } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+out_free:
+               req_set_fail(req);
+       }
+
+       if (ret >= 0)
+               ret += sr->done_io;
+       else if (sr->done_io)
+               ret = sr->done_io;
+       cflags = io_put_kbuf(req, issue_flags);
+       if (msg.msg_inq)
+               cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+       io_req_set_res(req, ret, cflags);
+       return IOU_OK;
+}
+
+int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       struct io_accept *accept = io_kiocb_to_cmd(req);
+       unsigned flags;
+
+       if (sqe->len || sqe->buf_index)
+               return -EINVAL;
+
+       accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+       accept->flags = READ_ONCE(sqe->accept_flags);
+       accept->nofile = rlimit(RLIMIT_NOFILE);
+       flags = READ_ONCE(sqe->ioprio);
+       if (flags & ~IORING_ACCEPT_MULTISHOT)
+               return -EINVAL;
+
+       accept->file_slot = READ_ONCE(sqe->file_index);
+       if (accept->file_slot) {
+               if (accept->flags & SOCK_CLOEXEC)
+                       return -EINVAL;
+               if (flags & IORING_ACCEPT_MULTISHOT &&
+                   accept->file_slot != IORING_FILE_INDEX_ALLOC)
+                       return -EINVAL;
+       }
+       if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+               return -EINVAL;
+       if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
+               accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+       if (flags & IORING_ACCEPT_MULTISHOT)
+               req->flags |= REQ_F_APOLL_MULTISHOT;
+       return 0;
+}
+
+int io_accept(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_ring_ctx *ctx = req->ctx;
+       struct io_accept *accept = io_kiocb_to_cmd(req);
+       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+       unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
+       bool fixed = !!accept->file_slot;
+       struct file *file;
+       int ret, fd;
+
+retry:
+       if (!fixed) {
+               fd = __get_unused_fd_flags(accept->flags, accept->nofile);
+               if (unlikely(fd < 0))
+                       return fd;
+       }
+       file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
+                        accept->flags);
+       if (IS_ERR(file)) {
+               if (!fixed)
+                       put_unused_fd(fd);
+               ret = PTR_ERR(file);
+               if (ret == -EAGAIN && force_nonblock) {
+                       /*
+                        * if it's multishot and polled, we don't need to
+                        * return EAGAIN to arm the poll infra since it
+                        * has already been done
+                        */
+                       if ((req->flags & IO_APOLL_MULTI_POLLED) ==
+                           IO_APOLL_MULTI_POLLED)
+                               ret = IOU_ISSUE_SKIP_COMPLETE;
+                       return ret;
+               }
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+               req_set_fail(req);
+       } else if (!fixed) {
+               fd_install(fd, file);
+               ret = fd;
+       } else {
+               ret = io_fixed_fd_install(req, issue_flags, file,
+                                               accept->file_slot);
+       }
+
+       if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+               io_req_set_res(req, ret, 0);
+               return IOU_OK;
+       }
+       if (ret >= 0) {
+               bool filled;
+
+               spin_lock(&ctx->completion_lock);
+               filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret,
+                                        IORING_CQE_F_MORE);
+               io_commit_cqring(ctx);
+               spin_unlock(&ctx->completion_lock);
+               if (filled) {
+                       io_cqring_ev_posted(ctx);
+                       goto retry;
+               }
+               ret = -ECANCELED;
+       }
+
+       return ret;
+}
+
+int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       struct io_socket *sock = io_kiocb_to_cmd(req);
+
+       if (sqe->addr || sqe->rw_flags || sqe->buf_index)
+               return -EINVAL;
+
+       sock->domain = READ_ONCE(sqe->fd);
+       sock->type = READ_ONCE(sqe->off);
+       sock->protocol = READ_ONCE(sqe->len);
+       sock->file_slot = READ_ONCE(sqe->file_index);
+       sock->nofile = rlimit(RLIMIT_NOFILE);
+
+       sock->flags = sock->type & ~SOCK_TYPE_MASK;
+       if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
+               return -EINVAL;
+       if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+               return -EINVAL;
+       return 0;
+}
+
+int io_socket(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_socket *sock = io_kiocb_to_cmd(req);
+       bool fixed = !!sock->file_slot;
+       struct file *file;
+       int ret, fd;
+
+       if (!fixed) {
+               fd = __get_unused_fd_flags(sock->flags, sock->nofile);
+               if (unlikely(fd < 0))
+                       return fd;
+       }
+       file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
+       if (IS_ERR(file)) {
+               if (!fixed)
+                       put_unused_fd(fd);
+               ret = PTR_ERR(file);
+               if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
+                       return -EAGAIN;
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+               req_set_fail(req);
+       } else if (!fixed) {
+               fd_install(fd, file);
+               ret = fd;
+       } else {
+               ret = io_fixed_fd_install(req, issue_flags, file,
+                                           sock->file_slot);
+       }
+       io_req_set_res(req, ret, 0);
+       return IOU_OK;
+}
+
+int io_connect_prep_async(struct io_kiocb *req)
+{
+       struct io_async_connect *io = req->async_data;
+       struct io_connect *conn = io_kiocb_to_cmd(req);
+
+       return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
+}
+
+int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       struct io_connect *conn = io_kiocb_to_cmd(req);
+
+       if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
+               return -EINVAL;
+
+       conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       conn->addr_len =  READ_ONCE(sqe->addr2);
+       return 0;
+}
+
+int io_connect(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_connect *connect = io_kiocb_to_cmd(req);
+       struct io_async_connect __io, *io;
+       unsigned file_flags;
+       int ret;
+       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+
+       if (req_has_async_data(req)) {
+               io = req->async_data;
+       } else {
+               ret = move_addr_to_kernel(connect->addr,
+                                               connect->addr_len,
+                                               &__io.address);
+               if (ret)
+                       goto out;
+               io = &__io;
+       }
+
+       file_flags = force_nonblock ? O_NONBLOCK : 0;
+
+       ret = __sys_connect_file(req->file, &io->address,
+                                       connect->addr_len, file_flags);
+       if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
+               if (req_has_async_data(req))
+                       return -EAGAIN;
+               if (io_alloc_async_data(req)) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               memcpy(req->async_data, &__io, sizeof(__io));
+               return -EAGAIN;
+       }
+       if (ret == -ERESTARTSYS)
+               ret = -EINTR;
+out:
+       if (ret < 0)
+               req_set_fail(req);
+       io_req_set_res(req, ret, 0);
+       return IOU_OK;
+}
+#endif
diff --git a/io_uring/net.h b/io_uring/net.h
new file mode 100644 (file)
index 0000000..81d71d1
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/net.h>
+#include <linux/uio.h>
+
+#if defined(CONFIG_NET)
+struct io_async_msghdr {
+       struct iovec                    fast_iov[UIO_FASTIOV];
+       /* points to an allocated iov, if NULL we use fast_iov instead */
+       struct iovec                    *free_iov;
+       struct sockaddr __user          *uaddr;
+       struct msghdr                   msg;
+       struct sockaddr_storage         addr;
+};
+
+struct io_async_connect {
+       struct sockaddr_storage         address;
+};
+
+int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_shutdown(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_sendmsg_prep_async(struct io_kiocb *req);
+void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req);
+int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags);
+int io_send(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_recvmsg_prep_async(struct io_kiocb *req);
+int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags);
+int io_recv(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_accept(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_socket(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_connect_prep_async(struct io_kiocb *req);
+int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_connect(struct io_kiocb *req, unsigned int issue_flags);
+#endif