tcp: change IPv6 flow-label upon receiving spurious retransmission
authorYuchung Cheng <ycheng@google.com>
Wed, 29 Aug 2018 21:53:56 +0000 (14:53 -0700)
committerDavid S. Miller <davem@davemloft.net>
Sat, 1 Sep 2018 06:03:00 +0000 (23:03 -0700)
Currently a Linux IPv6 TCP sender will change the flow label upon
timeouts to potentially steer away from a data path that has gone
bad. However this does not help if the problem is on the ACK path
and the data path is healthy. In this case the receiver is likely
to receive repeated spurious retransmission because the sender
couldn't get the ACKs in time and has recurring timeouts.

This patch adds another feature to mitigate this problem. It
leverages the DSACK states in the receiver to change the flow
label of the ACKs to speculatively re-route the ACK packets.
In order to allow triggering on the second consecutive spurious
RTO, the receiver changes the flow label upon sending a second
consecutive DSACK for a sequence number below RCV.NXT.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/ipv4/tcp.c
net/ipv4/tcp_input.c

index b8af2fe..8c4235c 100644 (file)
@@ -2595,6 +2595,8 @@ int tcp_disconnect(struct sock *sk, int flags)
        tp->compressed_ack = 0;
        tp->bytes_sent = 0;
        tp->bytes_retrans = 0;
+       tp->duplicate_sack[0].start_seq = 0;
+       tp->duplicate_sack[0].end_seq = 0;
        tp->dsack_dups = 0;
        tp->reord_seen = 0;
 
index 4c2dd9f..62508a2 100644 (file)
@@ -4199,6 +4199,17 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
                tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
 }
 
+static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
+{
+       /* When the ACK path fails or drops most ACKs, the sender would
+        * timeout and spuriously retransmit the same segment repeatedly.
+        * The receiver remembers and reflects via DSACKs. Leverage the
+        * DSACK state and change the txhash to re-route speculatively.
+        */
+       if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq)
+               sk_rethink_txhash(sk);
+}
+
 static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -4211,6 +4222,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
                if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
                        u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
+                       tcp_rcv_spurious_retrans(sk, skb);
                        if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
                                end_seq = tp->rcv_nxt;
                        tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
@@ -4755,6 +4767,7 @@ queue_and_out:
        }
 
        if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+               tcp_rcv_spurious_retrans(sk, skb);
                /* A retransmit, 2nd most common case.  Force an immediate ack. */
                NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
                tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);