net: veth: add page_pool for page recycling
authorLorenzo Bianconi <lorenzo@kernel.org>
Sat, 22 Apr 2023 18:54:32 +0000 (20:54 +0200)
committerJakub Kicinski <kuba@kernel.org>
Tue, 25 Apr 2023 01:07:08 +0000 (18:07 -0700)
Introduce page_pool support in veth driver in order to recycle pages
in veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb
through the page allocator.
The patch has been tested sending tcp traffic to a veth pair where the
remote peer is running a simple xdp program just returning xdp_pass:

veth upstream codebase:
MTU 1500B: ~ 8Gbps
MTU 8000B: ~ 13.9Gbps

veth upstream codebase + pp support:
MTU 1500B: ~ 9.2Gbps
MTU 8000B: ~ 16.2Gbps

Tested-by: Maryam Tahhan <mtahhan@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/Kconfig
drivers/net/veth.c

index c34bd43..368c6f5 100644 (file)
@@ -402,6 +402,7 @@ config TUN_VNET_CROSS_LE
 
 config VETH
        tristate "Virtual ethernet pair device"
+       select PAGE_POOL
        help
          This device is a local ethernet tunnel. Devices are created in pairs.
          When one end receives the packet it appears on its pair and vice
index 4b3c664..35d2285 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/ptr_ring.h>
 #include <linux/bpf_trace.h>
 #include <linux/net_tstamp.h>
+#include <net/page_pool.h>
 
 #define DRV_NAME       "veth"
 #define DRV_VERSION    "1.0"
@@ -65,6 +66,7 @@ struct veth_rq {
        bool                    rx_notify_masked;
        struct ptr_ring         xdp_ring;
        struct xdp_rxq_info     xdp_rxq;
+       struct page_pool        *page_pool;
 };
 
 struct veth_priv {
@@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
                        goto drop;
 
                /* Allocate skb head */
-               page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+               page = page_pool_dev_alloc_pages(rq->page_pool);
                if (!page)
                        goto drop;
 
                nskb = build_skb(page_address(page), PAGE_SIZE);
                if (!nskb) {
-                       put_page(page);
+                       page_pool_put_full_page(rq->page_pool, page, true);
                        goto drop;
                }
 
                skb_reserve(nskb, VETH_XDP_HEADROOM);
+               skb_copy_header(nskb, skb);
+               skb_mark_for_recycle(nskb);
+
                size = min_t(u32, skb->len, max_head_size);
                if (skb_copy_bits(skb, 0, nskb->data, size)) {
                        consume_skb(nskb);
@@ -745,7 +750,6 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
                }
                skb_put(nskb, size);
 
-               skb_copy_header(nskb, skb);
                head_off = skb_headroom(nskb) - skb_headroom(skb);
                skb_headers_offset_update(nskb, head_off);
 
@@ -754,7 +758,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
                len = skb->len - off;
 
                for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
-                       page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+                       page = page_pool_dev_alloc_pages(rq->page_pool);
                        if (!page) {
                                consume_skb(nskb);
                                goto drop;
@@ -1002,12 +1006,38 @@ static int veth_poll(struct napi_struct *napi, int budget)
        return done;
 }
 
+static int veth_create_page_pool(struct veth_rq *rq)
+{
+       struct page_pool_params pp_params = {
+               .order = 0,
+               .pool_size = VETH_RING_SIZE,
+               .nid = NUMA_NO_NODE,
+               .dev = &rq->dev->dev,
+       };
+
+       rq->page_pool = page_pool_create(&pp_params);
+       if (IS_ERR(rq->page_pool)) {
+               int err = PTR_ERR(rq->page_pool);
+
+               rq->page_pool = NULL;
+               return err;
+       }
+
+       return 0;
+}
+
 static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
 {
        struct veth_priv *priv = netdev_priv(dev);
        int err, i;
 
        for (i = start; i < end; i++) {
+               err = veth_create_page_pool(&priv->rq[i]);
+               if (err)
+                       goto err_page_pool;
+       }
+
+       for (i = start; i < end; i++) {
                struct veth_rq *rq = &priv->rq[i];
 
                err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
@@ -1027,6 +1057,11 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
 err_xdp_ring:
        for (i--; i >= start; i--)
                ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
+err_page_pool:
+       for (i = start; i < end; i++) {
+               page_pool_destroy(priv->rq[i].page_pool);
+               priv->rq[i].page_pool = NULL;
+       }
 
        return err;
 }
@@ -1056,6 +1091,11 @@ static void veth_napi_del_range(struct net_device *dev, int start, int end)
                rq->rx_notify_masked = false;
                ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
        }
+
+       for (i = start; i < end; i++) {
+               page_pool_destroy(priv->rq[i].page_pool);
+               priv->rq[i].page_pool = NULL;
+       }
 }
 
 static void veth_napi_del(struct net_device *dev)