net: Add a function to splice pages into an skbuff for MSG_SPLICE_PAGES
authorDavid Howells <dhowells@redhat.com>
Mon, 22 May 2023 12:11:12 +0000 (13:11 +0100)
committerJakub Kicinski <kuba@kernel.org>
Wed, 24 May 2023 03:48:27 +0000 (20:48 -0700)
Add a function to handle MSG_SPLICE_PAGES being passed internally to
sendmsg().  Pages are spliced into the given socket buffer if possible and
copied in if not (e.g. they're slab pages or have a zero refcount).

Signed-off-by: David Howells <dhowells@redhat.com>
cc: David Ahern <dsahern@kernel.org>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/linux/skbuff.h
net/core/skbuff.c

index 1501140..1b2ebf6 100644 (file)
@@ -5097,5 +5097,8 @@ static inline void skb_mark_for_recycle(struct sk_buff *skb)
 #endif
 }
 
+ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
+                            ssize_t maxsize, gfp_t gfp);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SKBUFF_H */
index 7f53dcb..f4a5b51 100644 (file)
@@ -6892,3 +6892,91 @@ nodefer: __kfree_skb(skb);
        if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
                smp_call_function_single_async(cpu, &sd->defer_csd);
 }
+
+static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
+                                size_t offset, size_t len)
+{
+       const char *kaddr;
+       __wsum csum;
+
+       kaddr = kmap_local_page(page);
+       csum = csum_partial(kaddr + offset, len, 0);
+       kunmap_local(kaddr);
+       skb->csum = csum_block_add(skb->csum, csum, skb->len);
+}
+
+/**
+ * skb_splice_from_iter - Splice (or copy) pages to skbuff
+ * @skb: The buffer to add pages to
+ * @iter: Iterator representing the pages to be added
+ * @maxsize: Maximum amount of pages to be added
+ * @gfp: Allocation flags
+ *
+ * This is a common helper function for supporting MSG_SPLICE_PAGES.  It
+ * extracts pages from an iterator and adds them to the socket buffer if
+ * possible, copying them to fragments if not possible (such as if they're slab
+ * pages).
+ *
+ * Returns the amount of data spliced/copied or -EMSGSIZE if there's
+ * insufficient space in the buffer to transfer anything.
+ */
+ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
+                            ssize_t maxsize, gfp_t gfp)
+{
+       size_t frag_limit = READ_ONCE(sysctl_max_skb_frags);
+       struct page *pages[8], **ppages = pages;
+       ssize_t spliced = 0, ret = 0;
+       unsigned int i;
+
+       while (iter->count > 0) {
+               ssize_t space, nr;
+               size_t off, len;
+
+               ret = -EMSGSIZE;
+               space = frag_limit - skb_shinfo(skb)->nr_frags;
+               if (space < 0)
+                       break;
+
+               /* We might be able to coalesce without increasing nr_frags */
+               nr = clamp_t(size_t, space, 1, ARRAY_SIZE(pages));
+
+               len = iov_iter_extract_pages(iter, &ppages, maxsize, nr, 0, &off);
+               if (len <= 0) {
+                       ret = len ?: -EIO;
+                       break;
+               }
+
+               i = 0;
+               do {
+                       struct page *page = pages[i++];
+                       size_t part = min_t(size_t, PAGE_SIZE - off, len);
+
+                       ret = -EIO;
+                       if (WARN_ON_ONCE(!sendpage_ok(page)))
+                               goto out;
+
+                       ret = skb_append_pagefrags(skb, page, off, part,
+                                                  frag_limit);
+                       if (ret < 0) {
+                               iov_iter_revert(iter, len);
+                               goto out;
+                       }
+
+                       if (skb->ip_summed == CHECKSUM_NONE)
+                               skb_splice_csum_page(skb, page, off, part);
+
+                       off = 0;
+                       spliced += part;
+                       maxsize -= part;
+                       len -= part;
+               } while (len > 0);
+
+               if (maxsize <= 0)
+                       break;
+       }
+
+out:
+       skb_len_add(skb, spliced);
+       return spliced ?: ret;
+}
+EXPORT_SYMBOL(skb_splice_from_iter);