tcp: improve recv_skip_hint for tcp_zerocopy_receive
authorSoheil Hassas Yeganeh <soheil@google.com>
Fri, 11 Oct 2019 03:27:02 +0000 (23:27 -0400)
committerDavid S. Miller <davem@davemloft.net>
Sun, 13 Oct 2019 18:16:25 +0000 (11:16 -0700)
tcp_zerocopy_receive() rounds down the zc->length a multiple of
PAGE_SIZE. This results in two issues:
- tcp_zerocopy_receive sets recv_skip_hint to the length of the
  receive queue if the zc->length input is smaller than the
  PAGE_SIZE, even though the data in receive queue could be
  zerocopied.
- tcp_zerocopy_receive would set recv_skip_hint of 0, in cases
  where we have a little bit of data after the perfectly-sized
  packets.

To fix these issues, do not store the rounded down value in
zc->length. Round down the length passed to zap_page_range(),
and return min(inq, zc->length) when the zap_range is 0.

Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/ipv4/tcp.c

index f98a1882e537dca0102e829cb349be50302d83ab..9f41a76c1c543df51db0b2a1f38b85396022bb85 100644 (file)
@@ -1739,8 +1739,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
                                struct tcp_zerocopy_receive *zc)
 {
        unsigned long address = (unsigned long)zc->address;
+       u32 length = 0, seq, offset, zap_len;
        const skb_frag_t *frags = NULL;
-       u32 length = 0, seq, offset;
        struct vm_area_struct *vma;
        struct sk_buff *skb = NULL;
        struct tcp_sock *tp;
@@ -1767,12 +1767,12 @@ static int tcp_zerocopy_receive(struct sock *sk,
        seq = tp->copied_seq;
        inq = tcp_inq(sk);
        zc->length = min_t(u32, zc->length, inq);
-       zc->length &= ~(PAGE_SIZE - 1);
-       if (zc->length) {
-               zap_page_range(vma, address, zc->length);
+       zap_len = zc->length & ~(PAGE_SIZE - 1);
+       if (zap_len) {
+               zap_page_range(vma, address, zap_len);
                zc->recv_skip_hint = 0;
        } else {
-               zc->recv_skip_hint = inq;
+               zc->recv_skip_hint = zc->length;
        }
        ret = 0;
        while (length + PAGE_SIZE <= zc->length) {