netfilter: nft_set_pipapo: release elements in clone from abort path
[platform/kernel/linux-rpi.git] / net / netfilter / nf_conntrack_core.c
index 94e18fb..31399c5 100644 (file)
@@ -66,6 +66,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
 struct conntrack_gc_work {
        struct delayed_work     dwork;
        u32                     next_bucket;
+       u32                     avg_timeout;
+       u32                     start_time;
        bool                    exiting;
        bool                    early_drop;
 };
@@ -74,10 +76,25 @@ static __read_mostly struct kmem_cache *nf_conntrack_cachep;
 static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
 static __read_mostly bool nf_conntrack_locks_all;
 
-#define GC_SCAN_INTERVAL       (120u * HZ)
+/* serialize hash resizes and nf_ct_iterate_cleanup */
+static DEFINE_MUTEX(nf_conntrack_mutex);
+
+#define GC_SCAN_INTERVAL_MAX   (60ul * HZ)
+#define GC_SCAN_INTERVAL_MIN   (1ul * HZ)
+
+/* clamp timeouts to this value (TCP unacked) */
+#define GC_SCAN_INTERVAL_CLAMP (300ul * HZ)
+
+/* large initial bias so that we don't scan often just because we have
+ * three entries with a 1s timeout.
+ */
+#define GC_SCAN_INTERVAL_INIT  INT_MAX
+
 #define GC_SCAN_MAX_DURATION   msecs_to_jiffies(10)
+#define GC_SCAN_EXPIRED_MAX    (64000u / HZ)
 
-#define MAX_CHAINLEN   64u
+#define MIN_CHAINLEN   8u
+#define MAX_CHAINLEN   (32u - MIN_CHAINLEN)
 
 static struct conntrack_gc_work conntrack_gc_work;
 
@@ -188,11 +205,13 @@ seqcount_spinlock_t nf_conntrack_generation __read_mostly;
 static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
 
 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
+                             unsigned int zoneid,
                              const struct net *net)
 {
        struct {
                struct nf_conntrack_man src;
                union nf_inet_addr dst_addr;
+               unsigned int zone;
                u32 net_mix;
                u16 dport;
                u16 proto;
@@ -205,6 +224,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
        /* The direction must be ignored, so handle usable members manually. */
        combined.src = tuple->src;
        combined.dst_addr = tuple->dst.u3;
+       combined.zone = zoneid;
        combined.net_mix = net_hash_mix(net);
        combined.dport = (__force __u16)tuple->dst.u.all;
        combined.proto = tuple->dst.protonum;
@@ -219,15 +239,17 @@ static u32 scale_hash(u32 hash)
 
 static u32 __hash_conntrack(const struct net *net,
                            const struct nf_conntrack_tuple *tuple,
+                           unsigned int zoneid,
                            unsigned int size)
 {
-       return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
+       return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size);
 }
 
 static u32 hash_conntrack(const struct net *net,
-                         const struct nf_conntrack_tuple *tuple)
+                         const struct nf_conntrack_tuple *tuple,
+                         unsigned int zoneid)
 {
-       return scale_hash(hash_conntrack_raw(tuple, net));
+       return scale_hash(hash_conntrack_raw(tuple, zoneid, net));
 }
 
 static bool nf_ct_get_tuple_ports(const struct sk_buff *skb,
@@ -549,7 +571,7 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
 
 #define NFCT_ALIGN(len)        (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK)
 
-/* Released via destroy_conntrack() */
+/* Released via nf_ct_destroy() */
 struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
                                 const struct nf_conntrack_zone *zone,
                                 gfp_t flags)
@@ -576,7 +598,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
        tmpl->status = IPS_TEMPLATE;
        write_pnet(&tmpl->ct_net, net);
        nf_ct_zone_add(tmpl, zone);
-       atomic_set(&tmpl->ct_general.use, 0);
+       refcount_set(&tmpl->ct_general.use, 1);
 
        return tmpl;
 }
@@ -603,13 +625,12 @@ static void destroy_gre_conntrack(struct nf_conn *ct)
 #endif
 }
 
-static void
-destroy_conntrack(struct nf_conntrack *nfct)
+void nf_ct_destroy(struct nf_conntrack *nfct)
 {
        struct nf_conn *ct = (struct nf_conn *)nfct;
 
-       pr_debug("destroy_conntrack(%p)\n", ct);
-       WARN_ON(atomic_read(&nfct->use) != 0);
+       pr_debug("%s(%p)\n", __func__, ct);
+       WARN_ON(refcount_read(&nfct->use) != 0);
 
        if (unlikely(nf_ct_is_template(ct))) {
                nf_ct_tmpl_free(ct);
@@ -634,9 +655,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
        if (ct->master)
                nf_ct_put(ct->master);
 
-       pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
+       pr_debug("%s: returning ct=%p to slab\n", __func__, ct);
        nf_conntrack_free(ct);
 }
+EXPORT_SYMBOL(nf_ct_destroy);
 
 static void nf_ct_delete_from_lists(struct nf_conn *ct)
 {
@@ -650,9 +672,11 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
        do {
                sequence = read_seqcount_begin(&nf_conntrack_generation);
                hash = hash_conntrack(net,
-                                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+                                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                     nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
                reply_hash = hash_conntrack(net,
-                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                          nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 
        clean_from_lists(ct);
@@ -673,7 +697,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
 
        tstamp = nf_conn_tstamp_find(ct);
        if (tstamp) {
-               s32 timeout = ct->timeout - nfct_time_stamp;
+               s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
 
                tstamp->stop = ktime_get_real_ns();
                if (timeout < 0)
@@ -731,7 +755,7 @@ nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
 /* caller must hold rcu readlock and none of the nf_conntrack_locks */
 static void nf_ct_gc_expired(struct nf_conn *ct)
 {
-       if (!atomic_inc_not_zero(&ct->ct_general.use))
+       if (!refcount_inc_not_zero(&ct->ct_general.use))
                return;
 
        if (nf_ct_should_gc(ct))
@@ -799,7 +823,7 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
                 * in, try to obtain a reference and re-check tuple
                 */
                ct = nf_ct_tuplehash_to_ctrack(h);
-               if (likely(atomic_inc_not_zero(&ct->ct_general.use))) {
+               if (likely(refcount_inc_not_zero(&ct->ct_general.use))) {
                        if (likely(nf_ct_key_equal(h, tuple, zone, net)))
                                goto found;
 
@@ -819,8 +843,20 @@ struct nf_conntrack_tuple_hash *
 nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
                      const struct nf_conntrack_tuple *tuple)
 {
-       return __nf_conntrack_find_get(net, zone, tuple,
-                                      hash_conntrack_raw(tuple, net));
+       unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+       struct nf_conntrack_tuple_hash *thash;
+
+       thash = __nf_conntrack_find_get(net, zone, tuple,
+                                       hash_conntrack_raw(tuple, zone_id, net));
+
+       if (thash)
+               return thash;
+
+       rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+       if (rid != zone_id)
+               return __nf_conntrack_find_get(net, zone, tuple,
+                                              hash_conntrack_raw(tuple, rid, net));
+       return thash;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
 
@@ -842,6 +878,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
        unsigned int hash, reply_hash;
        struct nf_conntrack_tuple_hash *h;
        struct hlist_nulls_node *n;
+       unsigned int max_chainlen;
        unsigned int chainlen = 0;
        unsigned int sequence;
        int err = -EEXIST;
@@ -852,18 +889,22 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
        do {
                sequence = read_seqcount_begin(&nf_conntrack_generation);
                hash = hash_conntrack(net,
-                                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+                                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                     nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
                reply_hash = hash_conntrack(net,
-                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                          nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 
+       max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
+
        /* See if there's one in the list already, including reverse */
        hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
                if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
                                    zone, net))
                        goto out;
 
-               if (chainlen++ > MAX_CHAINLEN)
+               if (chainlen++ > max_chainlen)
                        goto chaintoolong;
        }
 
@@ -873,13 +914,13 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
                if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
                                    zone, net))
                        goto out;
-               if (chainlen++ > MAX_CHAINLEN)
+               if (chainlen++ > max_chainlen)
                        goto chaintoolong;
        }
 
        smp_wmb();
        /* The caller holds a reference to this object */
-       atomic_set(&ct->ct_general.use, 2);
+       refcount_set(&ct->ct_general.use, 2);
        __nf_conntrack_hash_insert(ct, hash, reply_hash);
        nf_conntrack_double_unlock(hash, reply_hash);
        NF_CT_STAT_INC(net, insert);
@@ -930,7 +971,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
 {
        struct nf_conn_tstamp *tstamp;
 
-       atomic_inc(&ct->ct_general.use);
+       refcount_inc(&ct->ct_general.use);
        ct->status |= IPS_CONFIRMED;
 
        /* set conntrack timestamp, if enabled. */
@@ -961,7 +1002,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
 
                nf_ct_acct_merge(ct, ctinfo, loser_ct);
                nf_ct_add_to_dying_list(loser_ct);
-               nf_conntrack_put(&loser_ct->ct_general);
+               nf_ct_put(loser_ct);
                nf_ct_set(skb, ct, ctinfo);
 
                NF_CT_STAT_INC(net, clash_resolve);
@@ -1008,7 +1049,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
        }
 
        /* We want the clashing entry to go away real soon: 1 second timeout. */
-       loser_ct->timeout = nfct_time_stamp + HZ;
+       WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ);
 
        /* IPS_NAT_CLASH removes the entry automatically on the first
         * reply.  Also prevents UDP tracker from moving the entry to
@@ -1103,8 +1144,8 @@ drop:
 int
 __nf_conntrack_confirm(struct sk_buff *skb)
 {
+       unsigned int chainlen = 0, sequence, max_chainlen;
        const struct nf_conntrack_zone *zone;
-       unsigned int chainlen = 0, sequence;
        unsigned int hash, reply_hash;
        struct nf_conntrack_tuple_hash *h;
        struct nf_conn *ct;
@@ -1133,8 +1174,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
                hash = scale_hash(hash);
                reply_hash = hash_conntrack(net,
-                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
+                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                          nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 
        /* We're not in hash table, and we refuse to set up related
@@ -1168,6 +1209,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                goto dying;
        }
 
+       max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
        /* See if there's one in the list already, including reverse:
           NAT could have grabbed it without realizing, since we're
           not in the hash.  If there is, we lost race. */
@@ -1175,7 +1217,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
                                    zone, net))
                        goto out;
-               if (chainlen++ > MAX_CHAINLEN)
+               if (chainlen++ > max_chainlen)
                        goto chaintoolong;
        }
 
@@ -1184,7 +1226,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
                                    zone, net))
                        goto out;
-               if (chainlen++ > MAX_CHAINLEN) {
+               if (chainlen++ > max_chainlen) {
 chaintoolong:
                        nf_ct_add_to_dying_list(ct);
                        NF_CT_STAT_INC(net, chaintoolong);
@@ -1246,7 +1288,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
        rcu_read_lock();
  begin:
        nf_conntrack_get_ht(&ct_hash, &hsize);
-       hash = __hash_conntrack(net, tuple, hsize);
+       hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize);
 
        hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
                ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1322,7 +1364,7 @@ static unsigned int early_drop_list(struct net *net,
                    nf_ct_is_dying(tmp))
                        continue;
 
-               if (!atomic_inc_not_zero(&tmp->ct_general.use))
+               if (!refcount_inc_not_zero(&tmp->ct_general.use))
                        continue;
 
                /* kill only if still in same netns -- might have moved due to
@@ -1391,16 +1433,28 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
 
 static void gc_worker(struct work_struct *work)
 {
-       unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
        unsigned int i, hashsz, nf_conntrack_max95 = 0;
-       unsigned long next_run = GC_SCAN_INTERVAL;
+       u32 end_time, start_time = nfct_time_stamp;
        struct conntrack_gc_work *gc_work;
+       unsigned int expired_count = 0;
+       unsigned long next_run;
+       s32 delta_time;
+
        gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
 
        i = gc_work->next_bucket;
        if (gc_work->early_drop)
                nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
 
+       if (i == 0) {
+               gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+               gc_work->start_time = start_time;
+       }
+
+       next_run = gc_work->avg_timeout;
+
+       end_time = start_time + GC_SCAN_MAX_DURATION;
+
        do {
                struct nf_conntrack_tuple_hash *h;
                struct hlist_nulls_head *ct_hash;
@@ -1417,6 +1471,7 @@ static void gc_worker(struct work_struct *work)
 
                hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
                        struct nf_conntrack_net *cnet;
+                       unsigned long expires;
                        struct net *net;
 
                        tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -1426,11 +1481,29 @@ static void gc_worker(struct work_struct *work)
                                continue;
                        }
 
+                       if (expired_count > GC_SCAN_EXPIRED_MAX) {
+                               rcu_read_unlock();
+
+                               gc_work->next_bucket = i;
+                               gc_work->avg_timeout = next_run;
+
+                               delta_time = nfct_time_stamp - gc_work->start_time;
+
+                               /* re-sched immediately if total cycle time is exceeded */
+                               next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX;
+                               goto early_exit;
+                       }
+
                        if (nf_ct_is_expired(tmp)) {
                                nf_ct_gc_expired(tmp);
+                               expired_count++;
                                continue;
                        }
 
+                       expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
+                       next_run += expires;
+                       next_run /= 2u;
+
                        if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
                                continue;
 
@@ -1440,7 +1513,7 @@ static void gc_worker(struct work_struct *work)
                                continue;
 
                        /* need to take reference to avoid possible races */
-                       if (!atomic_inc_not_zero(&tmp->ct_general.use))
+                       if (!refcount_inc_not_zero(&tmp->ct_general.use))
                                continue;
 
                        if (gc_worker_skip_ct(tmp)) {
@@ -1448,8 +1521,10 @@ static void gc_worker(struct work_struct *work)
                                continue;
                        }
 
-                       if (gc_worker_can_early_drop(tmp))
+                       if (gc_worker_can_early_drop(tmp)) {
                                nf_ct_kill(tmp);
+                               expired_count++;
+                       }
 
                        nf_ct_put(tmp);
                }
@@ -1462,33 +1537,38 @@ static void gc_worker(struct work_struct *work)
                cond_resched();
                i++;
 
-               if (time_after(jiffies, end_time) && i < hashsz) {
+               delta_time = nfct_time_stamp - end_time;
+               if (delta_time > 0 && i < hashsz) {
+                       gc_work->avg_timeout = next_run;
                        gc_work->next_bucket = i;
                        next_run = 0;
-                       break;
+                       goto early_exit;
                }
        } while (i < hashsz);
 
+       gc_work->next_bucket = 0;
+
+       next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX);
+
+       delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1);
+       if (next_run > (unsigned long)delta_time)
+               next_run -= delta_time;
+       else
+               next_run = 1;
+
+early_exit:
        if (gc_work->exiting)
                return;
 
-       /*
-        * Eviction will normally happen from the packet path, and not
-        * from this gc worker.
-        *
-        * This worker is only here to reap expired entries when system went
-        * idle after a busy period.
-        */
-       if (next_run) {
+       if (next_run)
                gc_work->early_drop = false;
-               gc_work->next_bucket = 0;
-       }
+
        queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
 }
 
 static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
 {
-       INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
+       INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
        gc_work->exiting = false;
 }
 
@@ -1531,7 +1611,7 @@ __nf_conntrack_alloc(struct net *net,
        /* save hash for reusing when confirming */
        *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
        ct->status = 0;
-       ct->timeout = 0;
+       WRITE_ONCE(ct->timeout, 0);
        write_pnet(&ct->ct_net, net);
        memset(&ct->__nfct_init_offset, 0,
               offsetof(struct nf_conn, proto) -
@@ -1542,7 +1622,7 @@ __nf_conntrack_alloc(struct net *net,
        /* Because we use RCU lookups, we set ct_general.use to zero before
         * this is inserted in any list.
         */
-       atomic_set(&ct->ct_general.use, 0);
+       refcount_set(&ct->ct_general.use, 0);
        return ct;
 out:
        atomic_dec(&cnet->count);
@@ -1567,7 +1647,7 @@ void nf_conntrack_free(struct nf_conn *ct)
        /* A freed object has refcnt == 0, that's
         * the golden rule for SLAB_TYPESAFE_BY_RCU
         */
-       WARN_ON(atomic_read(&ct->ct_general.use) != 0);
+       WARN_ON(refcount_read(&ct->ct_general.use) != 0);
 
        nf_ct_ext_destroy(ct);
        kmem_cache_free(nf_conntrack_cachep, ct);
@@ -1659,8 +1739,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
        if (!exp)
                __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
 
-       /* Now it is inserted into the unconfirmed list, bump refcount */
-       nf_conntrack_get(&ct->ct_general);
+       /* Now it is inserted into the unconfirmed list, set refcount to 1. */
+       refcount_set(&ct->ct_general.use, 1);
        nf_ct_add_to_unconfirmed_list(ct);
 
        local_bh_enable();
@@ -1687,8 +1767,8 @@ resolve_normal_ct(struct nf_conn *tmpl,
        struct nf_conntrack_tuple_hash *h;
        enum ip_conntrack_info ctinfo;
        struct nf_conntrack_zone tmp;
+       u32 hash, zone_id, rid;
        struct nf_conn *ct;
-       u32 hash;
 
        if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
                             dataoff, state->pf, protonum, state->net,
@@ -1699,8 +1779,20 @@ resolve_normal_ct(struct nf_conn *tmpl,
 
        /* look for tuple match */
        zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
-       hash = hash_conntrack_raw(&tuple, state->net);
+
+       zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+       hash = hash_conntrack_raw(&tuple, zone_id, state->net);
        h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
+
+       if (!h) {
+               rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+               if (zone_id != rid) {
+                       u32 tmp = hash_conntrack_raw(&tuple, rid, state->net);
+
+                       h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp);
+               }
+       }
+
        if (!h) {
                h = init_conntrack(state->net, tmpl, &tuple,
                                   skb, dataoff, hash);
@@ -1879,17 +1971,19 @@ repeat:
                /* Invalid: inverse of the return code tells
                 * the netfilter core what to do */
                pr_debug("nf_conntrack_in: Can't track with proto module\n");
-               nf_conntrack_put(&ct->ct_general);
+               nf_ct_put(ct);
                skb->_nfct = 0;
-               NF_CT_STAT_INC_ATOMIC(state->net, invalid);
-               if (ret == -NF_DROP)
-                       NF_CT_STAT_INC_ATOMIC(state->net, drop);
                /* Special case: TCP tracker reports an attempt to reopen a
                 * closed/aborted connection. We have to go back and create a
                 * fresh conntrack.
                 */
                if (ret == -NF_REPEAT)
                        goto repeat;
+
+               NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+               if (ret == -NF_DROP)
+                       NF_CT_STAT_INC_ATOMIC(state->net, drop);
+
                ret = -ret;
                goto out;
        }
@@ -2225,28 +2319,31 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
        spinlock_t *lockp;
 
        for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+               struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket];
+
+               if (hlist_nulls_empty(hslot))
+                       continue;
+
                lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
                local_bh_disable();
                nf_conntrack_lock(lockp);
-               if (*bucket < nf_conntrack_htable_size) {
-                       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
-                               if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
-                                       continue;
-                               /* All nf_conn objects are added to hash table twice, one
-                                * for original direction tuple, once for the reply tuple.
-                                *
-                                * Exception: In the IPS_NAT_CLASH case, only the reply
-                                * tuple is added (the original tuple already existed for
-                                * a different object).
-                                *
-                                * We only need to call the iterator once for each
-                                * conntrack, so we just use the 'reply' direction
-                                * tuple while iterating.
-                                */
-                               ct = nf_ct_tuplehash_to_ctrack(h);
-                               if (iter(ct, data))
-                                       goto found;
-                       }
+               hlist_nulls_for_each_entry(h, n, hslot, hnnode) {
+                       if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
+                               continue;
+                       /* All nf_conn objects are added to hash table twice, one
+                        * for original direction tuple, once for the reply tuple.
+                        *
+                        * Exception: In the IPS_NAT_CLASH case, only the reply
+                        * tuple is added (the original tuple already existed for
+                        * a different object).
+                        *
+                        * We only need to call the iterator once for each
+                        * conntrack, so we just use the 'reply' direction
+                        * tuple while iterating.
+                        */
+                       ct = nf_ct_tuplehash_to_ctrack(h);
+                       if (iter(ct, data))
+                               goto found;
                }
                spin_unlock(lockp);
                local_bh_enable();
@@ -2255,7 +2352,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
 
        return NULL;
 found:
-       atomic_inc(&ct->ct_general.use);
+       refcount_inc(&ct->ct_general.use);
        spin_unlock(lockp);
        local_bh_enable();
        return ct;
@@ -2264,26 +2361,20 @@ found:
 static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
                                  void *data, u32 portid, int report)
 {
-       unsigned int bucket = 0, sequence;
+       unsigned int bucket = 0;
        struct nf_conn *ct;
 
        might_sleep();
 
-       for (;;) {
-               sequence = read_seqcount_begin(&nf_conntrack_generation);
-
-               while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
-                       /* Time to push up daises... */
+       mutex_lock(&nf_conntrack_mutex);
+       while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+               /* Time to push up daises... */
 
-                       nf_ct_delete(ct, portid, report);
-                       nf_ct_put(ct);
-                       cond_resched();
-               }
-
-               if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
-                       break;
-               bucket = 0;
+               nf_ct_delete(ct, portid, report);
+               nf_ct_put(ct);
+               cond_resched();
        }
+       mutex_unlock(&nf_conntrack_mutex);
 }
 
 struct iter_data {
@@ -2519,8 +2610,10 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
        if (!hash)
                return -ENOMEM;
 
+       mutex_lock(&nf_conntrack_mutex);
        old_size = nf_conntrack_htable_size;
        if (old_size == hashsize) {
+               mutex_unlock(&nf_conntrack_mutex);
                kvfree(hash);
                return 0;
        }
@@ -2537,12 +2630,16 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
 
        for (i = 0; i < nf_conntrack_htable_size; i++) {
                while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
+                       unsigned int zone_id;
+
                        h = hlist_nulls_entry(nf_conntrack_hash[i].first,
                                              struct nf_conntrack_tuple_hash, hnnode);
                        ct = nf_ct_tuplehash_to_ctrack(h);
                        hlist_nulls_del_rcu(&h->hnnode);
+
+                       zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h));
                        bucket = __hash_conntrack(nf_ct_net(ct),
-                                                 &h->tuple, hashsize);
+                                                 &h->tuple, zone_id, hashsize);
                        hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
                }
        }
@@ -2556,6 +2653,8 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
        nf_conntrack_all_unlock();
        local_bh_enable();
 
+       mutex_unlock(&nf_conntrack_mutex);
+
        synchronize_net();
        kvfree(old_hash);
        return 0;
@@ -2726,7 +2825,7 @@ err_cachep:
 
 static struct nf_ct_hook nf_conntrack_hook = {
        .update         = nf_conntrack_update,
-       .destroy        = destroy_conntrack,
+       .destroy        = nf_ct_destroy,
        .get_tuple_skb  = nf_conntrack_get_tuple_skb,
 };