netfilter: nft_set_pipapo: release elements in clone from abort path

[platform/kernel/linux-rpi.git] / net / netfilter / nf_conntrack_core.c
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index 94e18fb..31399c5 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,6 +66,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
  struct conntrack_gc_work {
         struct delayed_work     dwork;
         u32                     next_bucket;
+       u32                     avg_timeout;
+       u32                     start_time;
         bool                    exiting;
         bool                    early_drop;
  };
@@ -74,10 +76,25 @@ static __read_mostly struct kmem_cache *nf_conntrack_cachep;
  static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
  static __read_mostly bool nf_conntrack_locks_all;
  
-#define GC_SCAN_INTERVAL       (120u * HZ)
+/* serialize hash resizes and nf_ct_iterate_cleanup */
+static DEFINE_MUTEX(nf_conntrack_mutex);
+
+#define GC_SCAN_INTERVAL_MAX   (60ul * HZ)
+#define GC_SCAN_INTERVAL_MIN   (1ul * HZ)
+
+/* clamp timeouts to this value (TCP unacked) */
+#define GC_SCAN_INTERVAL_CLAMP (300ul * HZ)
+
+/* large initial bias so that we don't scan often just because we have
+ * three entries with a 1s timeout.
+ */
+#define GC_SCAN_INTERVAL_INIT  INT_MAX
+
  #define GC_SCAN_MAX_DURATION   msecs_to_jiffies(10)
+#define GC_SCAN_EXPIRED_MAX    (64000u / HZ)
  
-#define MAX_CHAINLEN   64u
+#define MIN_CHAINLEN   8u
+#define MAX_CHAINLEN   (32u - MIN_CHAINLEN)
  
  static struct conntrack_gc_work conntrack_gc_work;
  
@@ -188,11 +205,13 @@ seqcount_spinlock_t nf_conntrack_generation __read_mostly;
  static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
  
  static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
+                             unsigned int zoneid,
                               const struct net *net)
  {
         struct {
                 struct nf_conntrack_man src;
                 union nf_inet_addr dst_addr;
+               unsigned int zone;
                 u32 net_mix;
                 u16 dport;
                 u16 proto;
@@ -205,6 +224,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
         /* The direction must be ignored, so handle usable members manually. */
         combined.src = tuple->src;
         combined.dst_addr = tuple->dst.u3;
+       combined.zone = zoneid;
         combined.net_mix = net_hash_mix(net);
         combined.dport = (__force __u16)tuple->dst.u.all;
         combined.proto = tuple->dst.protonum;
@@ -219,15 +239,17 @@ static u32 scale_hash(u32 hash)
  
  static u32 __hash_conntrack(const struct net *net,
                             const struct nf_conntrack_tuple *tuple,
+                           unsigned int zoneid,
                             unsigned int size)
  {
-       return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
+       return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size);
  }
  
  static u32 hash_conntrack(const struct net *net,
-                         const struct nf_conntrack_tuple *tuple)
+                         const struct nf_conntrack_tuple *tuple,
+                         unsigned int zoneid)
  {
-       return scale_hash(hash_conntrack_raw(tuple, net));
+       return scale_hash(hash_conntrack_raw(tuple, zoneid, net));
  }
  
  static bool nf_ct_get_tuple_ports(const struct sk_buff *skb,
@@ -549,7 +571,7 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
  
  #define NFCT_ALIGN(len)        (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK)
  
-/* Released via destroy_conntrack() */
+/* Released via nf_ct_destroy() */
  struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
                                  const struct nf_conntrack_zone *zone,
                                  gfp_t flags)
@@ -576,7 +598,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
         tmpl->status = IPS_TEMPLATE;
         write_pnet(&tmpl->ct_net, net);
         nf_ct_zone_add(tmpl, zone);
-       atomic_set(&tmpl->ct_general.use, 0);
+       refcount_set(&tmpl->ct_general.use, 1);
  
         return tmpl;
  }
@@ -603,13 +625,12 @@ static void destroy_gre_conntrack(struct nf_conn *ct)
  #endif
  }
  
-static void
-destroy_conntrack(struct nf_conntrack *nfct)
+void nf_ct_destroy(struct nf_conntrack *nfct)
  {
         struct nf_conn *ct = (struct nf_conn *)nfct;
  
-       pr_debug("destroy_conntrack(%p)\n", ct);
-       WARN_ON(atomic_read(&nfct->use) != 0);
+       pr_debug("%s(%p)\n", __func__, ct);
+       WARN_ON(refcount_read(&nfct->use) != 0);
  
         if (unlikely(nf_ct_is_template(ct))) {
                 nf_ct_tmpl_free(ct);
@@ -634,9 +655,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
         if (ct->master)
                 nf_ct_put(ct->master);
  
-       pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
+       pr_debug("%s: returning ct=%p to slab\n", __func__, ct);
         nf_conntrack_free(ct);
  }
+EXPORT_SYMBOL(nf_ct_destroy);
  
  static void nf_ct_delete_from_lists(struct nf_conn *ct)
  {
@@ -650,9 +672,11 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
         do {
                 sequence = read_seqcount_begin(&nf_conntrack_generation);
                 hash = hash_conntrack(net,
-                                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+                                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                     nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
                 reply_hash = hash_conntrack(net,
-                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                          nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
         } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
  
         clean_from_lists(ct);
@@ -673,7 +697,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
  
         tstamp = nf_conn_tstamp_find(ct);
         if (tstamp) {
-               s32 timeout = ct->timeout - nfct_time_stamp;
+               s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
  
                 tstamp->stop = ktime_get_real_ns();
                 if (timeout < 0)
@@ -731,7 +755,7 @@ nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
  /* caller must hold rcu readlock and none of the nf_conntrack_locks */
  static void nf_ct_gc_expired(struct nf_conn *ct)
  {
-       if (!atomic_inc_not_zero(&ct->ct_general.use))
+       if (!refcount_inc_not_zero(&ct->ct_general.use))
                 return;
  
         if (nf_ct_should_gc(ct))
@@ -799,7 +823,7 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
                  * in, try to obtain a reference and re-check tuple
                  */
                 ct = nf_ct_tuplehash_to_ctrack(h);
-               if (likely(atomic_inc_not_zero(&ct->ct_general.use))) {
+               if (likely(refcount_inc_not_zero(&ct->ct_general.use))) {
                         if (likely(nf_ct_key_equal(h, tuple, zone, net)))
                                 goto found;
  
@@ -819,8 +843,20 @@ struct nf_conntrack_tuple_hash *
  nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
                       const struct nf_conntrack_tuple *tuple)
  {
-       return __nf_conntrack_find_get(net, zone, tuple,
-                                      hash_conntrack_raw(tuple, net));
+       unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+       struct nf_conntrack_tuple_hash *thash;
+
+       thash = __nf_conntrack_find_get(net, zone, tuple,
+                                       hash_conntrack_raw(tuple, zone_id, net));
+
+       if (thash)
+               return thash;
+
+       rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+       if (rid != zone_id)
+               return __nf_conntrack_find_get(net, zone, tuple,
+                                              hash_conntrack_raw(tuple, rid, net));
+       return thash;
  }
  EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
  
@@ -842,6 +878,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
         unsigned int hash, reply_hash;
         struct nf_conntrack_tuple_hash *h;
         struct hlist_nulls_node *n;
+       unsigned int max_chainlen;
         unsigned int chainlen = 0;
         unsigned int sequence;
         int err = -EEXIST;
@@ -852,18 +889,22 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
         do {
                 sequence = read_seqcount_begin(&nf_conntrack_generation);
                 hash = hash_conntrack(net,
-                                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+                                     &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                     nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
                 reply_hash = hash_conntrack(net,
-                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                          nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
         } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
  
+       max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
+
         /* See if there's one in the list already, including reverse */
         hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
                 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
                                     zone, net))
                         goto out;
  
-               if (chainlen++ > MAX_CHAINLEN)
+               if (chainlen++ > max_chainlen)
                         goto chaintoolong;
         }
  
@@ -873,13 +914,13 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
                 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
                                     zone, net))
                         goto out;
-               if (chainlen++ > MAX_CHAINLEN)
+               if (chainlen++ > max_chainlen)
                         goto chaintoolong;
         }
  
         smp_wmb();
         /* The caller holds a reference to this object */
-       atomic_set(&ct->ct_general.use, 2);
+       refcount_set(&ct->ct_general.use, 2);
         __nf_conntrack_hash_insert(ct, hash, reply_hash);
         nf_conntrack_double_unlock(hash, reply_hash);
         NF_CT_STAT_INC(net, insert);
@@ -930,7 +971,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
  {
         struct nf_conn_tstamp *tstamp;
  
-       atomic_inc(&ct->ct_general.use);
+       refcount_inc(&ct->ct_general.use);
         ct->status |= IPS_CONFIRMED;
  
         /* set conntrack timestamp, if enabled. */
@@ -961,7 +1002,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
  
                 nf_ct_acct_merge(ct, ctinfo, loser_ct);
                 nf_ct_add_to_dying_list(loser_ct);
-               nf_conntrack_put(&loser_ct->ct_general);
+               nf_ct_put(loser_ct);
                 nf_ct_set(skb, ct, ctinfo);
  
                 NF_CT_STAT_INC(net, clash_resolve);
@@ -1008,7 +1049,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
         }
  
         /* We want the clashing entry to go away real soon: 1 second timeout. */
-       loser_ct->timeout = nfct_time_stamp + HZ;
+       WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ);
  
         /* IPS_NAT_CLASH removes the entry automatically on the first
          * reply.  Also prevents UDP tracker from moving the entry to
@@ -1103,8 +1144,8 @@ drop:
  int
  __nf_conntrack_confirm(struct sk_buff *skb)
  {
+       unsigned int chainlen = 0, sequence, max_chainlen;
         const struct nf_conntrack_zone *zone;
-       unsigned int chainlen = 0, sequence;
         unsigned int hash, reply_hash;
         struct nf_conntrack_tuple_hash *h;
         struct nf_conn *ct;
@@ -1133,8 +1174,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
                 hash = scale_hash(hash);
                 reply_hash = hash_conntrack(net,
-                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
+                                          &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                          nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
         } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
  
         /* We're not in hash table, and we refuse to set up related
@@ -1168,6 +1209,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                 goto dying;
         }
  
+       max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
         /* See if there's one in the list already, including reverse:
            NAT could have grabbed it without realizing, since we're
            not in the hash.  If there is, we lost race. */
@@ -1175,7 +1217,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
                                     zone, net))
                         goto out;
-               if (chainlen++ > MAX_CHAINLEN)
+               if (chainlen++ > max_chainlen)
                         goto chaintoolong;
         }
  
@@ -1184,7 +1226,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
                 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
                                     zone, net))
                         goto out;
-               if (chainlen++ > MAX_CHAINLEN) {
+               if (chainlen++ > max_chainlen) {
  chaintoolong:
                         nf_ct_add_to_dying_list(ct);
                         NF_CT_STAT_INC(net, chaintoolong);
@@ -1246,7 +1288,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
         rcu_read_lock();
   begin:
         nf_conntrack_get_ht(&ct_hash, &hsize);
-       hash = __hash_conntrack(net, tuple, hsize);
+       hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize);
  
         hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
                 ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1322,7 +1364,7 @@ static unsigned int early_drop_list(struct net *net,
                     nf_ct_is_dying(tmp))
                         continue;
  
-               if (!atomic_inc_not_zero(&tmp->ct_general.use))
+               if (!refcount_inc_not_zero(&tmp->ct_general.use))
                         continue;
  
                 /* kill only if still in same netns -- might have moved due to
@@ -1391,16 +1433,28 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
  
  static void gc_worker(struct work_struct *work)
  {
-       unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
         unsigned int i, hashsz, nf_conntrack_max95 = 0;
-       unsigned long next_run = GC_SCAN_INTERVAL;
+       u32 end_time, start_time = nfct_time_stamp;
         struct conntrack_gc_work *gc_work;
+       unsigned int expired_count = 0;
+       unsigned long next_run;
+       s32 delta_time;
+
         gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
  
         i = gc_work->next_bucket;
         if (gc_work->early_drop)
                 nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
  
+       if (i == 0) {
+               gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+               gc_work->start_time = start_time;
+       }
+
+       next_run = gc_work->avg_timeout;
+
+       end_time = start_time + GC_SCAN_MAX_DURATION;
+
         do {
                 struct nf_conntrack_tuple_hash *h;
                 struct hlist_nulls_head *ct_hash;
@@ -1417,6 +1471,7 @@ static void gc_worker(struct work_struct *work)
  
                 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
                         struct nf_conntrack_net *cnet;
+                       unsigned long expires;
                         struct net *net;
  
                         tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -1426,11 +1481,29 @@ static void gc_worker(struct work_struct *work)
                                 continue;
                         }
  
+                       if (expired_count > GC_SCAN_EXPIRED_MAX) {
+                               rcu_read_unlock();
+
+                               gc_work->next_bucket = i;
+                               gc_work->avg_timeout = next_run;
+
+                               delta_time = nfct_time_stamp - gc_work->start_time;
+
+                               /* re-sched immediately if total cycle time is exceeded */
+                               next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX;
+                               goto early_exit;
+                       }
+
                         if (nf_ct_is_expired(tmp)) {
                                 nf_ct_gc_expired(tmp);
+                               expired_count++;
                                 continue;
                         }
  
+                       expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
+                       next_run += expires;
+                       next_run /= 2u;
+
                         if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
                                 continue;
  
@@ -1440,7 +1513,7 @@ static void gc_worker(struct work_struct *work)
                                 continue;
  
                         /* need to take reference to avoid possible races */
-                       if (!atomic_inc_not_zero(&tmp->ct_general.use))
+                       if (!refcount_inc_not_zero(&tmp->ct_general.use))
                                 continue;
  
                         if (gc_worker_skip_ct(tmp)) {
@@ -1448,8 +1521,10 @@ static void gc_worker(struct work_struct *work)
                                 continue;
                         }
  
-                       if (gc_worker_can_early_drop(tmp))
+                       if (gc_worker_can_early_drop(tmp)) {
                                 nf_ct_kill(tmp);
+                               expired_count++;
+                       }
  
                         nf_ct_put(tmp);
                 }
@@ -1462,33 +1537,38 @@ static void gc_worker(struct work_struct *work)
                 cond_resched();
                 i++;
  
-               if (time_after(jiffies, end_time) && i < hashsz) {
+               delta_time = nfct_time_stamp - end_time;
+               if (delta_time > 0 && i < hashsz) {
+                       gc_work->avg_timeout = next_run;
                         gc_work->next_bucket = i;
                         next_run = 0;
-                       break;
+                       goto early_exit;
                 }
         } while (i < hashsz);
  
+       gc_work->next_bucket = 0;
+
+       next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX);
+
+       delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1);
+       if (next_run > (unsigned long)delta_time)
+               next_run -= delta_time;
+       else
+               next_run = 1;
+
+early_exit:
         if (gc_work->exiting)
                 return;
  
-       /*
-        * Eviction will normally happen from the packet path, and not
-        * from this gc worker.
-        *
-        * This worker is only here to reap expired entries when system went
-        * idle after a busy period.
-        */
-       if (next_run) {
+       if (next_run)
                 gc_work->early_drop = false;
-               gc_work->next_bucket = 0;
-       }
+
         queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
  }
  
  static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
  {
-       INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
+       INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
         gc_work->exiting = false;
  }
  
@@ -1531,7 +1611,7 @@ __nf_conntrack_alloc(struct net *net,
         /* save hash for reusing when confirming */
         *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
         ct->status = 0;
-       ct->timeout = 0;
+       WRITE_ONCE(ct->timeout, 0);
         write_pnet(&ct->ct_net, net);
         memset(&ct->__nfct_init_offset, 0,
                offsetof(struct nf_conn, proto) -
@@ -1542,7 +1622,7 @@ __nf_conntrack_alloc(struct net *net,
         /* Because we use RCU lookups, we set ct_general.use to zero before
          * this is inserted in any list.
          */
-       atomic_set(&ct->ct_general.use, 0);
+       refcount_set(&ct->ct_general.use, 0);
         return ct;
  out:
         atomic_dec(&cnet->count);
@@ -1567,7 +1647,7 @@ void nf_conntrack_free(struct nf_conn *ct)
         /* A freed object has refcnt == 0, that's
          * the golden rule for SLAB_TYPESAFE_BY_RCU
          */
-       WARN_ON(atomic_read(&ct->ct_general.use) != 0);
+       WARN_ON(refcount_read(&ct->ct_general.use) != 0);
  
         nf_ct_ext_destroy(ct);
         kmem_cache_free(nf_conntrack_cachep, ct);
@@ -1659,8 +1739,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
         if (!exp)
                 __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
  
-       /* Now it is inserted into the unconfirmed list, bump refcount */
-       nf_conntrack_get(&ct->ct_general);
+       /* Now it is inserted into the unconfirmed list, set refcount to 1. */
+       refcount_set(&ct->ct_general.use, 1);
         nf_ct_add_to_unconfirmed_list(ct);
  
         local_bh_enable();
@@ -1687,8 +1767,8 @@ resolve_normal_ct(struct nf_conn *tmpl,
         struct nf_conntrack_tuple_hash *h;
         enum ip_conntrack_info ctinfo;
         struct nf_conntrack_zone tmp;
+       u32 hash, zone_id, rid;
         struct nf_conn *ct;
-       u32 hash;
  
         if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
                              dataoff, state->pf, protonum, state->net,
@@ -1699,8 +1779,20 @@ resolve_normal_ct(struct nf_conn *tmpl,
  
         /* look for tuple match */
         zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
-       hash = hash_conntrack_raw(&tuple, state->net);
+
+       zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+       hash = hash_conntrack_raw(&tuple, zone_id, state->net);
         h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
+
+       if (!h) {
+               rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+               if (zone_id != rid) {
+                       u32 tmp = hash_conntrack_raw(&tuple, rid, state->net);
+
+                       h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp);
+               }
+       }
+
         if (!h) {
                 h = init_conntrack(state->net, tmpl, &tuple,
                                    skb, dataoff, hash);
@@ -1879,17 +1971,19 @@ repeat:
                 /* Invalid: inverse of the return code tells
                  * the netfilter core what to do */
                 pr_debug("nf_conntrack_in: Can't track with proto module\n");
-               nf_conntrack_put(&ct->ct_general);
+               nf_ct_put(ct);
                 skb->_nfct = 0;
-               NF_CT_STAT_INC_ATOMIC(state->net, invalid);
-               if (ret == -NF_DROP)
-                       NF_CT_STAT_INC_ATOMIC(state->net, drop);
                 /* Special case: TCP tracker reports an attempt to reopen a
                  * closed/aborted connection. We have to go back and create a
                  * fresh conntrack.
                  */
                 if (ret == -NF_REPEAT)
                         goto repeat;
+
+               NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+               if (ret == -NF_DROP)
+                       NF_CT_STAT_INC_ATOMIC(state->net, drop);
+
                 ret = -ret;
                 goto out;
         }
@@ -2225,28 +2319,31 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
         spinlock_t *lockp;
  
         for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+               struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket];
+
+               if (hlist_nulls_empty(hslot))
+                       continue;
+
                 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
                 local_bh_disable();
                 nf_conntrack_lock(lockp);
-               if (*bucket < nf_conntrack_htable_size) {
-                       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
-                               if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
-                                       continue;
-                               /* All nf_conn objects are added to hash table twice, one
-                                * for original direction tuple, once for the reply tuple.
-                                *
-                                * Exception: In the IPS_NAT_CLASH case, only the reply
-                                * tuple is added (the original tuple already existed for
-                                * a different object).
-                                *
-                                * We only need to call the iterator once for each
-                                * conntrack, so we just use the 'reply' direction
-                                * tuple while iterating.
-                                */
-                               ct = nf_ct_tuplehash_to_ctrack(h);
-                               if (iter(ct, data))
-                                       goto found;
-                       }
+               hlist_nulls_for_each_entry(h, n, hslot, hnnode) {
+                       if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
+                               continue;
+                       /* All nf_conn objects are added to hash table twice, one
+                        * for original direction tuple, once for the reply tuple.
+                        *
+                        * Exception: In the IPS_NAT_CLASH case, only the reply
+                        * tuple is added (the original tuple already existed for
+                        * a different object).
+                        *
+                        * We only need to call the iterator once for each
+                        * conntrack, so we just use the 'reply' direction
+                        * tuple while iterating.
+                        */
+                       ct = nf_ct_tuplehash_to_ctrack(h);
+                       if (iter(ct, data))
+                               goto found;
                 }
                 spin_unlock(lockp);
                 local_bh_enable();
@@ -2255,7 +2352,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
  
         return NULL;
  found:
-       atomic_inc(&ct->ct_general.use);
+       refcount_inc(&ct->ct_general.use);
         spin_unlock(lockp);
         local_bh_enable();
         return ct;
@@ -2264,26 +2361,20 @@ found:
  static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
                                   void *data, u32 portid, int report)
  {
-       unsigned int bucket = 0, sequence;
+       unsigned int bucket = 0;
         struct nf_conn *ct;
  
         might_sleep();
  
-       for (;;) {
-               sequence = read_seqcount_begin(&nf_conntrack_generation);
-
-               while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
-                       /* Time to push up daises... */
+       mutex_lock(&nf_conntrack_mutex);
+       while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+               /* Time to push up daises... */
  
-                       nf_ct_delete(ct, portid, report);
-                       nf_ct_put(ct);
-                       cond_resched();
-               }
-
-               if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
-                       break;
-               bucket = 0;
+               nf_ct_delete(ct, portid, report);
+               nf_ct_put(ct);
+               cond_resched();
         }
+       mutex_unlock(&nf_conntrack_mutex);
  }
  
  struct iter_data {
@@ -2519,8 +2610,10 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
         if (!hash)
                 return -ENOMEM;
  
+       mutex_lock(&nf_conntrack_mutex);
         old_size = nf_conntrack_htable_size;
         if (old_size == hashsize) {
+               mutex_unlock(&nf_conntrack_mutex);
                 kvfree(hash);
                 return 0;
         }
@@ -2537,12 +2630,16 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
  
         for (i = 0; i < nf_conntrack_htable_size; i++) {
                 while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
+                       unsigned int zone_id;
+
                         h = hlist_nulls_entry(nf_conntrack_hash[i].first,
                                               struct nf_conntrack_tuple_hash, hnnode);
                         ct = nf_ct_tuplehash_to_ctrack(h);
                         hlist_nulls_del_rcu(&h->hnnode);
+
+                       zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h));
                         bucket = __hash_conntrack(nf_ct_net(ct),
-                                                 &h->tuple, hashsize);
+                                                 &h->tuple, zone_id, hashsize);
                         hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
                 }
         }
@@ -2556,6 +2653,8 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
         nf_conntrack_all_unlock();
         local_bh_enable();
  
+       mutex_unlock(&nf_conntrack_mutex);
+
         synchronize_net();
         kvfree(old_hash);
         return 0;
@@ -2726,7 +2825,7 @@ err_cachep:
  
  static struct nf_ct_hook nf_conntrack_hook = {
         .update         = nf_conntrack_update,
-       .destroy        = destroy_conntrack,
+       .destroy        = nf_ct_destroy,
         .get_tuple_skb  = nf_conntrack_get_tuple_skb,
  };