tcp: fix zerocopy and notsent_lowat issues
authorEric Dumazet <edumazet@google.com>
Tue, 26 Mar 2019 15:34:55 +0000 (08:34 -0700)
committerDavid S. Miller <davem@davemloft.net>
Wed, 27 Mar 2019 20:59:02 +0000 (13:59 -0700)
My recent patch had at least three problems :

1) TX zerocopy wants notification when skb is acknowledged,
   thus we need to call skb_zcopy_clear() if the skb is
   cached into sk->sk_tx_skb_cache

2) Some applications might expect precise EPOLLOUT
   notifications, so we need to update sk->sk_wmem_queued
   and call sk_mem_uncharge() from sk_wmem_free_skb()
   in all cases. The SOCK_QUEUE_SHRUNK flag must also be set.

3) Reuse of saved skb should have used skb_cloned() instead
  of simply checking if the fast clone has been freed.

Fixes: 472c2e07eef0 ("tcp: add one skb cache for tx")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/sock.h
net/ipv4/tcp.c

index 577d91fb56267371c6bc5ae65f7454deba726bd6..7fa2232785226bcafd46b230559964fd16f3c4f4 100644 (file)
@@ -1465,13 +1465,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
 
 static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
 {
+       sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
+       sk->sk_wmem_queued -= skb->truesize;
+       sk_mem_uncharge(sk, skb->truesize);
        if (!sk->sk_tx_skb_cache) {
+               skb_zcopy_clear(skb, true);
                sk->sk_tx_skb_cache = skb;
                return;
        }
-       sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
-       sk->sk_wmem_queued -= skb->truesize;
-       sk_mem_uncharge(sk, skb->truesize);
        __kfree_skb(skb);
 }
 
index 29b94edf05f9357d3a33744d677827ce624738ae..82bd707c03472f2cebb1a90d5f1c13acc821468f 100644 (file)
@@ -865,14 +865,9 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
 {
        struct sk_buff *skb;
 
-       skb = sk->sk_tx_skb_cache;
-       if (skb && !size) {
-               const struct sk_buff_fclones *fclones;
-
-               fclones = container_of(skb, struct sk_buff_fclones, skb1);
-               if (refcount_read(&fclones->fclone_ref) == 1) {
-                       sk->sk_wmem_queued -= skb->truesize;
-                       sk_mem_uncharge(sk, skb->truesize);
+       if (likely(!size)) {
+               skb = sk->sk_tx_skb_cache;
+               if (skb && !skb_cloned(skb)) {
                        skb->truesize -= skb->data_len;
                        sk->sk_tx_skb_cache = NULL;
                        pskb_trim(skb, 0);
@@ -2543,8 +2538,6 @@ void tcp_write_queue_purge(struct sock *sk)
        tcp_rtx_queue_purge(sk);
        skb = sk->sk_tx_skb_cache;
        if (skb) {
-               sk->sk_wmem_queued -= skb->truesize;
-               sk_mem_uncharge(sk, skb->truesize);
                __kfree_skb(skb);
                sk->sk_tx_skb_cache = NULL;
        }