xsk: allow remap of fill and/or completion rings
authorNuno Gonçalves <nunog@fr24.com>
Fri, 24 Mar 2023 10:02:22 +0000 (10:02 +0000)
committerAlexei Starovoitov <ast@kernel.org>
Sun, 26 Mar 2023 04:07:35 +0000 (21:07 -0700)
The remap of fill and completion rings was frowned upon as they
control the usage of UMEM which does not support concurrent use.
At the same time this would disallow the remap of these rings
into another process.

A possible use case is that the user wants to transfer the socket/
UMEM ownership to another process (via SYS_pidfd_getfd) and so
would need to also remap these rings.

This will have no impact on current usages and just relaxes the
remap limitation.

Signed-off-by: Nuno Gonçalves <nunog@fr24.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20230324100222.13434-1-nunog@fr24.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
net/xdp/xsk.c

index 2ac58b2..cc1e7f1 100644 (file)
@@ -1301,9 +1301,10 @@ static int xsk_mmap(struct file *file, struct socket *sock,
        loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
        unsigned long size = vma->vm_end - vma->vm_start;
        struct xdp_sock *xs = xdp_sk(sock->sk);
+       int state = READ_ONCE(xs->state);
        struct xsk_queue *q = NULL;
 
-       if (READ_ONCE(xs->state) != XSK_READY)
+       if (state != XSK_READY && state != XSK_BOUND)
                return -EBUSY;
 
        if (offset == XDP_PGOFF_RX_RING) {
@@ -1314,9 +1315,11 @@ static int xsk_mmap(struct file *file, struct socket *sock,
                /* Matches the smp_wmb() in XDP_UMEM_REG */
                smp_rmb();
                if (offset == XDP_UMEM_PGOFF_FILL_RING)
-                       q = READ_ONCE(xs->fq_tmp);
+                       q = state == XSK_READY ? READ_ONCE(xs->fq_tmp) :
+                                                READ_ONCE(xs->pool->fq);
                else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
-                       q = READ_ONCE(xs->cq_tmp);
+                       q = state == XSK_READY ? READ_ONCE(xs->cq_tmp) :
+                                                READ_ONCE(xs->pool->cq);
        }
 
        if (!q)