tcp-zerocopy: Return inq along with tcp receive zerocopy.
authorArjun Roy <arjunroy@google.com>
Fri, 14 Feb 2020 23:30:49 +0000 (15:30 -0800)
committerDavid S. Miller <davem@davemloft.net>
Mon, 17 Feb 2020 03:25:02 +0000 (19:25 -0800)
This patchset is intended to reduce the number of extra system calls
imposed by TCP receive zerocopy. For ping-pong RPC style workloads,
this patchset has demonstrated a system call reduction of about 30%
when coupled with userspace changes.

For applications using edge-triggered epoll, returning inq along with
the result of tcp receive zerocopy could remove the need to call
recvmsg()=-EAGAIN after a successful zerocopy. Generally speaking,
since normally we would need to perform a recvmsg() call for every
successful small RPC read via TCP receive zerocopy, returning inq can
reduce the number of system calls performed by approximately half.

Signed-off-by: Arjun Roy <arjunroy@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/uapi/linux/tcp.h
net/ipv4/tcp.c

index fd9eb8f..548f480 100644 (file)
@@ -345,5 +345,6 @@ struct tcp_zerocopy_receive {
        __u64 address;          /* in: address of mapping */
        __u32 length;           /* in/out: number of bytes to map/mapped */
        __u32 recv_skip_hint;   /* out: amount of bytes to skip */
+       __u32 inq; /* out: amount of bytes in read queue */
 };
 #endif /* _UAPI_LINUX_TCP_H */
index eb2d805..a697f14 100644 (file)
@@ -3667,13 +3667,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 
                if (get_user(len, optlen))
                        return -EFAULT;
-               if (len != sizeof(zc))
+               if (len < offsetofend(struct tcp_zerocopy_receive, length))
                        return -EINVAL;
+               if (len > sizeof(zc))
+                       len = sizeof(zc);
                if (copy_from_user(&zc, optval, len))
                        return -EFAULT;
                lock_sock(sk);
                err = tcp_zerocopy_receive(sk, &zc);
                release_sock(sk);
+               switch (len) {
+               case sizeof(zc):
+               case offsetofend(struct tcp_zerocopy_receive, inq):
+                       goto zerocopy_rcv_inq;
+               case offsetofend(struct tcp_zerocopy_receive, length):
+               default:
+                       goto zerocopy_rcv_out;
+               }
+zerocopy_rcv_inq:
+               zc.inq = tcp_inq_hint(sk);
+zerocopy_rcv_out:
                if (!err && copy_to_user(optval, &zc, len))
                        err = -EFAULT;
                return err;