netfilter: conntrack: fix the gc rescheduling delay
authorAntoine Tenart <atenart@kernel.org>
Fri, 16 Sep 2022 09:29:40 +0000 (11:29 +0200)
committerFlorian Westphal <fw@strlen.de>
Wed, 21 Sep 2022 08:44:56 +0000 (10:44 +0200)
Commit 2cfadb761d3d ("netfilter: conntrack: revisit gc autotuning")
changed the eviction rescheduling to the use average expiry of scanned
entries (within 1-60s) by doing:

  for (...) {
      expires = clamp(nf_ct_expires(tmp), ...);
      next_run += expires;
      next_run /= 2;
  }

The issue is the above will make the average ('next_run' here) more
dependent on the last expiration values than the firsts (for sets > 2).
Depending on the expiration values used to compute the average, the
result can be quite different than what's expected. To fix this we can
do the following:

  for (...) {
      expires = clamp(nf_ct_expires(tmp), ...);
      next_run += (expires - next_run) / ++count;
  }

Fixes: 2cfadb761d3d ("netfilter: conntrack: revisit gc autotuning")
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
net/netfilter/nf_conntrack_core.c

index c5851e1..8efa6bd 100644 (file)
@@ -67,6 +67,7 @@ struct conntrack_gc_work {
        struct delayed_work     dwork;
        u32                     next_bucket;
        u32                     avg_timeout;
+       u32                     count;
        u32                     start_time;
        bool                    exiting;
        bool                    early_drop;
@@ -1466,6 +1467,7 @@ static void gc_worker(struct work_struct *work)
        unsigned int expired_count = 0;
        unsigned long next_run;
        s32 delta_time;
+       long count;
 
        gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
 
@@ -1475,10 +1477,12 @@ static void gc_worker(struct work_struct *work)
 
        if (i == 0) {
                gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+               gc_work->count = 1;
                gc_work->start_time = start_time;
        }
 
        next_run = gc_work->avg_timeout;
+       count = gc_work->count;
 
        end_time = start_time + GC_SCAN_MAX_DURATION;
 
@@ -1498,8 +1502,8 @@ static void gc_worker(struct work_struct *work)
 
                hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
                        struct nf_conntrack_net *cnet;
-                       unsigned long expires;
                        struct net *net;
+                       long expires;
 
                        tmp = nf_ct_tuplehash_to_ctrack(h);
 
@@ -1513,6 +1517,7 @@ static void gc_worker(struct work_struct *work)
 
                                gc_work->next_bucket = i;
                                gc_work->avg_timeout = next_run;
+                               gc_work->count = count;
 
                                delta_time = nfct_time_stamp - gc_work->start_time;
 
@@ -1528,8 +1533,8 @@ static void gc_worker(struct work_struct *work)
                        }
 
                        expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
+                       expires = (expires - (long)next_run) / ++count;
                        next_run += expires;
-                       next_run /= 2u;
 
                        if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
                                continue;
@@ -1570,6 +1575,7 @@ static void gc_worker(struct work_struct *work)
                delta_time = nfct_time_stamp - end_time;
                if (delta_time > 0 && i < hashsz) {
                        gc_work->avg_timeout = next_run;
+                       gc_work->count = count;
                        gc_work->next_bucket = i;
                        next_run = 0;
                        goto early_exit;