netfilter: nft_set_pipapo: remove scratch_aligned pointer
[platform/kernel/linux-starfive.git] / net / netfilter / nft_set_pipapo.c
index c0dcc40..8e9b200 100644 (file)
 #include "nft_set_pipapo_avx2.h"
 #include "nft_set_pipapo.h"
 
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
-
 /**
  * pipapo_refill() - For each set bit, set bits from selected mapping table item
  * @map:       Bitmap to be scanned for set bits
@@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
                       const u32 *key, const struct nft_set_ext **ext)
 {
        struct nft_pipapo *priv = nft_set_priv(set);
+       struct nft_pipapo_scratch *scratch;
        unsigned long *res_map, *fill_map;
        u8 genmask = nft_genmask_cur(net);
        const u8 *rp = (const u8 *)key;
@@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
 
        local_bh_disable();
 
-       map_index = raw_cpu_read(nft_pipapo_scratch_index);
-
        m = rcu_dereference(priv->match);
 
        if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
                goto out;
 
-       res_map  = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
-       fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
+       scratch = *raw_cpu_ptr(m->scratch);
+
+       map_index = scratch->map_index;
+
+       res_map  = scratch->map + (map_index ? m->bsize_max : 0);
+       fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
 
        memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
 
@@ -460,7 +460,7 @@ next_match:
                b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
                                  last);
                if (b < 0) {
-                       raw_cpu_write(nft_pipapo_scratch_index, map_index);
+                       scratch->map_index = map_index;
                        local_bh_enable();
 
                        return false;
@@ -477,7 +477,7 @@ next_match:
                         * current inactive bitmap is clean and can be reused as
                         * *next* bitmap (not initial) for the next packet.
                         */
-                       raw_cpu_write(nft_pipapo_scratch_index, map_index);
+                       scratch->map_index = map_index;
                        local_bh_enable();
 
                        return true;
@@ -1102,6 +1102,25 @@ static void pipapo_map(struct nft_pipapo_match *m,
 }
 
 /**
+ * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
+ * @m:         Matching data
+ * @cpu:       CPU number
+ */
+static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
+{
+       struct nft_pipapo_scratch *s;
+       void *mem;
+
+       s = *per_cpu_ptr(m->scratch, cpu);
+       if (!s)
+               return;
+
+       mem = s;
+       mem -= s->align_off;
+       kfree(mem);
+}
+
+/**
  * pipapo_realloc_scratch() - Reallocate scratch maps for partial match results
  * @clone:     Copy of matching data with pending insertions and deletions
  * @bsize_max: Maximum bucket size, scratch maps cover two buckets
@@ -1114,12 +1133,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
        int i;
 
        for_each_possible_cpu(i) {
-               unsigned long *scratch;
+               struct nft_pipapo_scratch *scratch;
 #ifdef NFT_PIPAPO_ALIGN
-               unsigned long *scratch_aligned;
+               void *scratch_aligned;
+               u32 align_off;
 #endif
-
-               scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
+               scratch = kzalloc_node(struct_size(scratch, map,
+                                                  bsize_max * 2) +
                                       NFT_PIPAPO_ALIGN_HEADROOM,
                                       GFP_KERNEL, cpu_to_node(i));
                if (!scratch) {
@@ -1133,14 +1153,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
                        return -ENOMEM;
                }
 
-               kfree(*per_cpu_ptr(clone->scratch, i));
-
-               *per_cpu_ptr(clone->scratch, i) = scratch;
+               pipapo_free_scratch(clone, i);
 
 #ifdef NFT_PIPAPO_ALIGN
-               scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
-               *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
+               /* Align &scratch->map (not the struct itself): the extra
+                * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
+                * above guarantee we can waste up to those bytes in order
+                * to align the map field regardless of its offset within
+                * the struct.
+                */
+               BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
+
+               scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
+               scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
+               align_off = scratch_aligned - (void *)scratch;
+
+               scratch = scratch_aligned;
+               scratch->align_off = align_off;
 #endif
+               *per_cpu_ptr(clone->scratch, i) = scratch;
        }
 
        return 0;
@@ -1293,11 +1324,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
        if (!new->scratch)
                goto out_scratch;
 
-#ifdef NFT_PIPAPO_ALIGN
-       new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
-       if (!new->scratch_aligned)
-               goto out_scratch;
-#endif
        for_each_possible_cpu(i)
                *per_cpu_ptr(new->scratch, i) = NULL;
 
@@ -1349,10 +1375,7 @@ out_lt:
        }
 out_scratch_realloc:
        for_each_possible_cpu(i)
-               kfree(*per_cpu_ptr(new->scratch, i));
-#ifdef NFT_PIPAPO_ALIGN
-       free_percpu(new->scratch_aligned);
-#endif
+               pipapo_free_scratch(new, i);
 out_scratch:
        free_percpu(new->scratch);
        kfree(new);
@@ -1637,13 +1660,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m)
        int i;
 
        for_each_possible_cpu(i)
-               kfree(*per_cpu_ptr(m->scratch, i));
+               pipapo_free_scratch(m, i);
 
-#ifdef NFT_PIPAPO_ALIGN
-       free_percpu(m->scratch_aligned);
-#endif
        free_percpu(m->scratch);
-
        pipapo_free_fields(m);
 
        kfree(m);
@@ -2041,6 +2060,9 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
 
                e = f->mt[r].e;
 
+               if (!nft_set_elem_active(&e->ext, iter->genmask))
+                       goto cont;
+
                elem.priv = e;
 
                iter->err = iter->fn(ctx, set, iter, &elem);
@@ -2127,7 +2149,7 @@ static int nft_pipapo_init(const struct nft_set *set,
        m->field_count = field_count;
        m->bsize_max = 0;
 
-       m->scratch = alloc_percpu(unsigned long *);
+       m->scratch = alloc_percpu(struct nft_pipapo_scratch *);
        if (!m->scratch) {
                err = -ENOMEM;
                goto out_scratch;
@@ -2135,16 +2157,6 @@ static int nft_pipapo_init(const struct nft_set *set,
        for_each_possible_cpu(i)
                *per_cpu_ptr(m->scratch, i) = NULL;
 
-#ifdef NFT_PIPAPO_ALIGN
-       m->scratch_aligned = alloc_percpu(unsigned long *);
-       if (!m->scratch_aligned) {
-               err = -ENOMEM;
-               goto out_free;
-       }
-       for_each_possible_cpu(i)
-               *per_cpu_ptr(m->scratch_aligned, i) = NULL;
-#endif
-
        rcu_head_init(&m->rcu);
 
        nft_pipapo_for_each_field(f, i, m) {
@@ -2175,9 +2187,6 @@ static int nft_pipapo_init(const struct nft_set *set,
        return 0;
 
 out_free:
-#ifdef NFT_PIPAPO_ALIGN
-       free_percpu(m->scratch_aligned);
-#endif
        free_percpu(m->scratch);
 out_scratch:
        kfree(m);
@@ -2231,11 +2240,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
 
                nft_set_pipapo_match_destroy(ctx, set, m);
 
-#ifdef NFT_PIPAPO_ALIGN
-               free_percpu(m->scratch_aligned);
-#endif
                for_each_possible_cpu(cpu)
-                       kfree(*per_cpu_ptr(m->scratch, cpu));
+                       pipapo_free_scratch(m, cpu);
                free_percpu(m->scratch);
                pipapo_free_fields(m);
                kfree(m);
@@ -2248,11 +2254,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
                if (priv->dirty)
                        nft_set_pipapo_match_destroy(ctx, set, m);
 
-#ifdef NFT_PIPAPO_ALIGN
-               free_percpu(priv->clone->scratch_aligned);
-#endif
                for_each_possible_cpu(cpu)
-                       kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
+                       pipapo_free_scratch(priv->clone, cpu);
                free_percpu(priv->clone->scratch);
 
                pipapo_free_fields(priv->clone);