mac80211: reduce packet loss event false positives
authorFelix Fietkau <nbd@nbd.name>
Sat, 8 Aug 2020 17:25:42 +0000 (19:25 +0200)
committerJohannes Berg <johannes.berg@intel.com>
Thu, 27 Aug 2020 08:53:20 +0000 (10:53 +0200)
When running a large number of packets per second with a high data rate
and long A-MPDUs, the packet loss threshold can be reached very quickly
when the link conditions change. This frequently shows up as spurious
disconnects.
Mitigate false positives by using a similar logic for regular stations
as the one being used for TDLS, though with a more aggressive timeout.
Packet loss events are only reported if no ACK was received for a second.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20200808172542.41628-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
net/mac80211/sta_info.h
net/mac80211/status.c

index 8060d14..d501011 100644 (file)
@@ -524,7 +524,7 @@ struct ieee80211_sta_rx_stats {
  * @status_stats.retry_failed: # of frames that failed after retry
  * @status_stats.retry_count: # of retries attempted
  * @status_stats.lost_packets: # of lost packets
- * @status_stats.last_tdls_pkt_time: timestamp of last TDLS packet
+ * @status_stats.last_pkt_time: timestamp of last ACKed packet
  * @status_stats.msdu_retries: # of MSDU retries
  * @status_stats.msdu_failed: # of failed MSDUs
  * @status_stats.last_ack: last ack timestamp (jiffies)
@@ -597,7 +597,7 @@ struct sta_info {
                unsigned long filtered;
                unsigned long retry_failed, retry_count;
                unsigned int lost_packets;
-               unsigned long last_tdls_pkt_time;
+               unsigned long last_pkt_time;
                u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
                u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
                unsigned long last_ack;
index 6de63f1..0794396 100644 (file)
@@ -755,12 +755,16 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
  *  - current throughput (higher value for higher tpt)?
  */
 #define STA_LOST_PKT_THRESHOLD 50
+#define STA_LOST_PKT_TIME      HZ              /* 1 sec since last ACK */
 #define STA_LOST_TDLS_PKT_THRESHOLD    10
 #define STA_LOST_TDLS_PKT_TIME         (10*HZ) /* 10secs since last ACK */
 
 static void ieee80211_lost_packet(struct sta_info *sta,
                                  struct ieee80211_tx_info *info)
 {
+       unsigned long pkt_time = STA_LOST_PKT_TIME;
+       unsigned int pkt_thr = STA_LOST_PKT_THRESHOLD;
+
        /* If driver relies on its own algorithm for station kickout, skip
         * mac80211 packet loss mechanism.
         */
@@ -773,21 +777,20 @@ static void ieee80211_lost_packet(struct sta_info *sta,
                return;
 
        sta->status_stats.lost_packets++;
-       if (!sta->sta.tdls &&
-           sta->status_stats.lost_packets < STA_LOST_PKT_THRESHOLD)
-               return;
+       if (sta->sta.tdls) {
+               pkt_time = STA_LOST_TDLS_PKT_TIME;
+               pkt_thr = STA_LOST_PKT_THRESHOLD;
+       }
 
        /*
         * If we're in TDLS mode, make sure that all STA_LOST_TDLS_PKT_THRESHOLD
         * of the last packets were lost, and that no ACK was received in the
         * last STA_LOST_TDLS_PKT_TIME ms, before triggering the CQM packet-loss
         * mechanism.
+        * For non-TDLS, use STA_LOST_PKT_THRESHOLD and STA_LOST_PKT_TIME
         */
-       if (sta->sta.tdls &&
-           (sta->status_stats.lost_packets < STA_LOST_TDLS_PKT_THRESHOLD ||
-            time_before(jiffies,
-                        sta->status_stats.last_tdls_pkt_time +
-                        STA_LOST_TDLS_PKT_TIME)))
+       if (sta->status_stats.lost_packets < pkt_thr ||
+           !time_after(jiffies, sta->status_stats.last_pkt_time + pkt_time))
                return;
 
        cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr,
@@ -1033,9 +1036,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
                                        sta->status_stats.lost_packets = 0;
 
                                /* Track when last TDLS packet was ACKed */
-                               if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
-                                       sta->status_stats.last_tdls_pkt_time =
-                                               jiffies;
+                               sta->status_stats.last_pkt_time = jiffies;
                        } else if (noack_success) {
                                /* nothing to do here, do not account as lost */
                        } else {
@@ -1172,9 +1173,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
                        if (sta->status_stats.lost_packets)
                                sta->status_stats.lost_packets = 0;
 
-                       /* Track when last TDLS packet was ACKed */
-                       if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
-                               sta->status_stats.last_tdls_pkt_time = jiffies;
+                       /* Track when last packet was ACKed */
+                       sta->status_stats.last_pkt_time = jiffies;
                } else if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
                        return;
                } else if (noack_success) {
@@ -1263,8 +1263,7 @@ void ieee80211_tx_status_8023(struct ieee80211_hw *hw,
                        if (sta->status_stats.lost_packets)
                                sta->status_stats.lost_packets = 0;
 
-                       if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
-                               sta->status_stats.last_tdls_pkt_time = jiffies;
+                       sta->status_stats.last_pkt_time = jiffies;
                } else {
                        ieee80211_lost_packet(sta, info);
                }