io_uring: enable managed frags with register buffers
authorPavel Begunkov <asml.silence@gmail.com>
Tue, 12 Jul 2022 20:52:50 +0000 (21:52 +0100)
committerJens Axboe <axboe@kernel.dk>
Mon, 25 Jul 2022 00:41:07 +0000 (18:41 -0600)
io_uring's registered buffers infra has a good performant way of pinning
pages, so let's use SKBFL_MANAGED_FRAG_REFS when our requests are purely
register buffer backed.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/278731d3f20caf346cfc025fbee0b4c9ee4ed751.1657643355.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
io_uring/net.c

index 9ac2ce3..62be898 100644 (file)
@@ -886,6 +886,60 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return 0;
 }
 
+static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+                          struct iov_iter *from, size_t length)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       int frag = shinfo->nr_frags;
+       int ret = 0;
+       struct bvec_iter bi;
+       ssize_t copied = 0;
+       unsigned long truesize = 0;
+
+       if (!shinfo->nr_frags)
+               shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
+
+       if (!skb_zcopy_managed(skb) || !iov_iter_is_bvec(from)) {
+               skb_zcopy_downgrade_managed(skb);
+               return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
+       }
+
+       bi.bi_size = min(from->count, length);
+       bi.bi_bvec_done = from->iov_offset;
+       bi.bi_idx = 0;
+
+       while (bi.bi_size && frag < MAX_SKB_FRAGS) {
+               struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
+
+               copied += v.bv_len;
+               truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
+               __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
+                                          v.bv_offset, v.bv_len);
+               bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
+       }
+       if (bi.bi_size)
+               ret = -EMSGSIZE;
+
+       shinfo->nr_frags = frag;
+       from->bvec += bi.bi_idx;
+       from->nr_segs -= bi.bi_idx;
+       from->count = bi.bi_size;
+       from->iov_offset = bi.bi_bvec_done;
+
+       skb->data_len += copied;
+       skb->len += copied;
+       skb->truesize += truesize;
+
+       if (sk && sk->sk_type == SOCK_STREAM) {
+               sk_wmem_queued_add(sk, truesize);
+               if (!skb_zcopy_pure(skb))
+                       sk_mem_charge(sk, truesize);
+       } else {
+               refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+       }
+       return ret;
+}
+
 int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct sockaddr_storage address;
@@ -950,7 +1004,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
 
        msg.msg_flags = msg_flags;
        msg.msg_ubuf = &notif->uarg;
-       msg.sg_from_iter = NULL;
+       msg.sg_from_iter = io_sg_from_iter;
        ret = sock_sendmsg(sock, &msg);
 
        if (unlikely(ret < min_ret)) {