af_unix: Support MSG_SPLICE_PAGES
authorDavid Howells <dhowells@redhat.com>
Mon, 22 May 2023 12:11:24 +0000 (13:11 +0100)
committerJakub Kicinski <kuba@kernel.org>
Wed, 24 May 2023 03:48:27 +0000 (20:48 -0700)
Make AF_UNIX sendmsg() support MSG_SPLICE_PAGES, splicing in pages from the
source iterator if possible and copying the data in otherwise.

This allows ->sendpage() to be replaced by something that can handle
multiple multipage folios in a single transaction.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Kuniyuki Iwashima <kuniyu@amazon.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/unix/af_unix.c

index dd55506..976bc1c 100644 (file)
@@ -2200,19 +2200,25 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
        while (sent < len) {
                size = len - sent;
 
-               /* Keep two messages in the pipe so it schedules better */
-               size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
+               if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
+                       skb = sock_alloc_send_pskb(sk, 0, 0,
+                                                  msg->msg_flags & MSG_DONTWAIT,
+                                                  &err, 0);
+               } else {
+                       /* Keep two messages in the pipe so it schedules better */
+                       size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
 
-               /* allow fallback to order-0 allocations */
-               size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
+                       /* allow fallback to order-0 allocations */
+                       size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
 
-               data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
+                       data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
 
-               data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
+                       data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
 
-               skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
-                                          msg->msg_flags & MSG_DONTWAIT, &err,
-                                          get_order(UNIX_SKB_FRAGS_SZ));
+                       skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
+                                                  msg->msg_flags & MSG_DONTWAIT, &err,
+                                                  get_order(UNIX_SKB_FRAGS_SZ));
+               }
                if (!skb)
                        goto out_err;
 
@@ -2224,13 +2230,24 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                }
                fds_sent = true;
 
-               skb_put(skb, size - data_len);
-               skb->data_len = data_len;
-               skb->len = size;
-               err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
-               if (err) {
-                       kfree_skb(skb);
-                       goto out_err;
+               if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
+                       err = skb_splice_from_iter(skb, &msg->msg_iter, size,
+                                                  sk->sk_allocation);
+                       if (err < 0) {
+                               kfree_skb(skb);
+                               goto out_err;
+                       }
+                       size = err;
+                       refcount_add(size, &sk->sk_wmem_alloc);
+               } else {
+                       skb_put(skb, size - data_len);
+                       skb->data_len = data_len;
+                       skb->len = size;
+                       err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+                       if (err) {
+                               kfree_skb(skb);
+                               goto out_err;
+                       }
                }
 
                unix_state_lock(other);