tcp: do not accept ACK of bytes we never sent

[platform/kernel/linux-rpi.git] / net / ipv4 / tcp_input.c
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 06fe1cf..e6c4929 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -253,6 +253,19 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
                 if (unlikely(len > icsk->icsk_ack.rcv_mss +
                                    MAX_TCP_OPTION_SPACE))
                         tcp_gro_dev_warn(sk, skb, len);
+               /* If the skb has a len of exactly 1*MSS and has the PSH bit
+                * set then it is likely the end of an application write. So
+                * more data may not be arriving soon, and yet the data sender
+                * may be waiting for an ACK if cwnd-bound or using TX zero
+                * copy. So we set ICSK_ACK_PUSHED here so that
+                * tcp_cleanup_rbuf() will send an ACK immediately if the app
+                * reads all of the data and is not ping-pong. If len > MSS
+                * then this logic does not matter (and does not hurt) because
+                * tcp_cleanup_rbuf() will always ACK immediately if the app
+                * reads data and there is more than an MSS of unACKed data.
+                */
+               if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH)
+                       icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
         } else {
                 /* Otherwise, we make more careful check taking into account,
                  * that SACKs block is variable.
@@ -2194,16 +2207,17 @@ void tcp_enter_loss(struct sock *sk)
   * restore sanity to the SACK scoreboard. If the apparent reneging
   * persists until this RTO then we'll clear the SACK scoreboard.
   */
-static bool tcp_check_sack_reneging(struct sock *sk, int flag)
+static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag)
  {
-       if (flag & FLAG_SACK_RENEGING &&
-           flag & FLAG_SND_UNA_ADVANCED) {
+       if (*ack_flag & FLAG_SACK_RENEGING &&
+           *ack_flag & FLAG_SND_UNA_ADVANCED) {
                 struct tcp_sock *tp = tcp_sk(sk);
                 unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
                                           msecs_to_jiffies(10));
  
                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                                           delay, TCP_RTO_MAX);
+               *ack_flag &= ~FLAG_SET_XMIT_TIMER;
                 return true;
         }
         return false;
@@ -2973,7 +2987,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
                 tp->prior_ssthresh = 0;
  
         /* B. In all the states check for reneging SACKs. */
-       if (tcp_check_sack_reneging(sk, flag))
+       if (tcp_check_sack_reneging(sk, ack_flag))
                 return;
  
         /* C. Check consistency of the current state. */
@@ -3795,8 +3809,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
          * then we can probably ignore it.
          */
         if (before(ack, prior_snd_una)) {
+               u32 max_window;
+
+               /* do not accept ACK for bytes we never sent. */
+               max_window = min_t(u64, tp->max_window, tp->bytes_acked);
                 /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
-               if (before(ack, prior_snd_una - tp->max_window)) {
+               if (before(ack, prior_snd_una - max_window)) {
                         if (!(flag & FLAG_NO_CHALLENGE_ACK))
                                 tcp_send_challenge_ack(sk);
                         return -SKB_DROP_REASON_TCP_TOO_OLD_ACK;
@@ -6436,22 +6454,23 @@ reset_and_undo:
  
  static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
  {
+       struct tcp_sock *tp = tcp_sk(sk);
         struct request_sock *req;
  
         /* If we are still handling the SYNACK RTO, see if timestamp ECR allows
          * undo. If peer SACKs triggered fast recovery, we can't undo here.
          */
-       if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
-               tcp_try_undo_loss(sk, false);
+       if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss && !tp->packets_out)
+               tcp_try_undo_recovery(sk);
  
         /* Reset rtx states to prevent spurious retransmits_timed_out() */
-       tcp_sk(sk)->retrans_stamp = 0;
+       tp->retrans_stamp = 0;
         inet_csk(sk)->icsk_retransmits = 0;
  
         /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
          * we no longer need req so release it.
          */
-       req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+       req = rcu_dereference_protected(tp->fastopen_rsk,
                                         lockdep_sock_is_held(sk));
         reqsk_fastopen_remove(sk, req, false);