netfilter: x_tables: make xt_replace_table wait until old rules are not used anymore
authorFlorian Westphal <fw@strlen.de>
Wed, 11 Oct 2017 23:13:50 +0000 (01:13 +0200)
committerPablo Neira Ayuso <pablo@netfilter.org>
Tue, 24 Oct 2017 16:01:50 +0000 (18:01 +0200)
xt_replace_table relies on table replacement counter retrieval (which
uses xt_recseq to synchronize pcpu counters).

This is fine, however with large rule set get_counters() can take
a very long time -- it needs to synchronize all counters because
it has to assume concurrent modifications can occur.

Make xt_replace_table synchronize by itself by waiting until all cpus
had an even seqcount.

This allows a followup patch to copy the counters of the old ruleset
without any synchonization after xt_replace_table has completed.

Cc: Dan Williams <dcbw@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
net/netfilter/x_tables.c

index c83a3b5..a164e51 100644 (file)
@@ -1153,6 +1153,7 @@ xt_replace_table(struct xt_table *table,
              int *error)
 {
        struct xt_table_info *private;
+       unsigned int cpu;
        int ret;
 
        ret = xt_jumpstack_alloc(newinfo);
@@ -1182,14 +1183,28 @@ xt_replace_table(struct xt_table *table,
        smp_wmb();
        table->private = newinfo;
 
+       /* make sure all cpus see new ->private value */
+       smp_wmb();
+
        /*
         * Even though table entries have now been swapped, other CPU's
-        * may still be using the old entries. This is okay, because
-        * resynchronization happens because of the locking done
-        * during the get_counters() routine.
+        * may still be using the old entries...
         */
        local_bh_enable();
 
+       /* ... so wait for even xt_recseq on all cpus */
+       for_each_possible_cpu(cpu) {
+               seqcount_t *s = &per_cpu(xt_recseq, cpu);
+               u32 seq = raw_read_seqcount(s);
+
+               if (seq & 1) {
+                       do {
+                               cond_resched();
+                               cpu_relax();
+                       } while (seq == raw_read_seqcount(s));
+               }
+       }
+
 #ifdef CONFIG_AUDIT
        if (audit_enabled) {
                audit_log(current->audit_context, GFP_KERNEL,