netfilter: conntrack: allow insertion of clashing entries

author Florian Westphal <fw@strlen.de>

Mon, 3 Feb 2020 16:37:07 +0000 (17:37 +0100)

committer Pablo Neira Ayuso <pablo@netfilter.org>

Mon, 17 Feb 2020 09:55:14 +0000 (10:55 +0100)
author Florian Westphal <fw@strlen.de>
Mon, 3 Feb 2020 16:37:07 +0000 (17:37 +0100)
committer Pablo Neira Ayuso <pablo@netfilter.org>
Mon, 17 Feb 2020 09:55:14 +0000 (10:55 +0100)
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h

index e5b752027a031b119ce09a982b93f935c736c93d..9670b54b484a6f7849a15e458a7f63246f425b2b 100644 (file)
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -145,6 +145,13 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
         }
  }
  
+/* after that hlist_nulls_del will work */
+static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n)
+{
+       n->pprev = &n->next;
+       n->next = (struct hlist_nulls_node *)NULLS_MARKER(NULL);
+}
+
  /**
   * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
   * @tpos:      the type * to use as a loop cursor.
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h

index 336014bf8868c3f04b92cdbf31bdf2ccafc68a71..b6f0bb1dc7998e67add1a97a62b69a07f68147e2 100644 (file)
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -97,6 +97,15 @@ enum ip_conntrack_status {
         IPS_UNTRACKED_BIT = 12,
         IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
  
+#ifdef __KERNEL__
+       /* Re-purposed for in-kernel use:
+        * Tags a conntrack entry that clashed with an existing entry
+        * on insert.
+        */
+       IPS_NAT_CLASH_BIT = IPS_UNTRACKED_BIT,
+       IPS_NAT_CLASH = IPS_UNTRACKED,
+#endif
+
         /* Conntrack got a helper explicitly attached via CT target. */
         IPS_HELPER_BIT = 13,
         IPS_HELPER = (1 << IPS_HELPER_BIT),
@@ -110,7 +119,8 @@ enum ip_conntrack_status {
          */
         IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
                                  IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
-                                IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
+                                IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_UNTRACKED |
+                                IPS_OFFLOAD),
  
         __IPS_MAX_BIT = 15,
  };
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index 3f069eb0f0fc78f23f1d9593aab8e2bdbe326a3b..1927fc296f9514bcd5866d340c6f659bea0fdb3e 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -940,11 +940,71 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
         return NF_DROP;
  }
  
+/**
+ * nf_ct_resolve_clash_harder - attempt to insert clashing conntrack entry
+ *
+ * @skb: skb that causes the collision
+ * @repl_idx: hash slot for reply direction
+ *
+ * Called when origin or reply direction had a clash.
+ * The skb can be handled without packet drop provided the reply direction
+ * is unique or there the existing entry has the identical tuple in both
+ * directions.
+ *
+ * Caller must hold conntrack table locks to prevent concurrent updates.
+ *
+ * Returns NF_DROP if the clash could not be handled.
+ */
+static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
+{
+       struct nf_conn *loser_ct = (struct nf_conn *)skb_nfct(skb);
+       const struct nf_conntrack_zone *zone;
+       struct nf_conntrack_tuple_hash *h;
+       struct hlist_nulls_node *n;
+       struct net *net;
+
+       zone = nf_ct_zone(loser_ct);
+       net = nf_ct_net(loser_ct);
+
+       /* Reply direction must never result in a clash, unless both origin
+        * and reply tuples are identical.
+        */
+       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[repl_idx], hnnode) {
+               if (nf_ct_key_equal(h,
+                                   &loser_ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                   zone, net))
+                       return __nf_ct_resolve_clash(skb, h);
+       }
+
+       /* We want the clashing entry to go away real soon: 1 second timeout. */
+       loser_ct->timeout = nfct_time_stamp + HZ;
+
+       /* IPS_NAT_CLASH removes the entry automatically on the first
+        * reply.  Also prevents UDP tracker from moving the entry to
+        * ASSURED state, i.e. the entry can always be evicted under
+        * pressure.
+        */
+       loser_ct->status |= IPS_FIXED_TIMEOUT | IPS_NAT_CLASH;
+
+       __nf_conntrack_insert_prepare(loser_ct);
+
+       /* fake add for ORIGINAL dir: we want lookups to only find the entry
+        * already in the table.  This also hides the clashing entry from
+        * ctnetlink iteration, i.e. conntrack -L won't show them.
+        */
+       hlist_nulls_add_fake(&loser_ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+
+       hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
+                                &nf_conntrack_hash[repl_idx]);
+       return NF_ACCEPT;
+}
+
  /**
   * nf_ct_resolve_clash - attempt to handle clash without packet drop
   *
   * @skb: skb that causes the clash
   * @h: tuplehash of the clashing entry already in table
+ * @hash_reply: hash slot for reply direction
   *
   * A conntrack entry can be inserted to the connection tracking table
   * if there is no existing entry with an identical tuple.
@@ -963,10 +1023,18 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
   * exactly the same, only the to-be-confirmed conntrack entry is discarded
   * and @skb is associated with the conntrack entry already in the table.
   *
+ * Failing that, the new, unconfirmed conntrack is still added to the table
+ * provided that the collision only occurs in the ORIGINAL direction.
+ * The new entry will be added after the existing one in the hash list,
+ * so packets in the ORIGINAL direction will continue to match the existing
+ * entry.  The new entry will also have a fixed timeout so it expires --
+ * due to the collision, it will not see bidirectional traffic.
+ *
   * Returns NF_DROP if the clash could not be resolved.
   */
  static __cold noinline int
-nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h)
+nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h,
+                   u32 reply_hash)
  {
         /* This is the conntrack entry already in hashes that won race. */
         struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
@@ -987,6 +1055,10 @@ nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h)
         if (ret == NF_ACCEPT)
                 return ret;
  
+       ret = nf_ct_resolve_clash_harder(skb, reply_hash);
+       if (ret == NF_ACCEPT)
+               return ret;
+
  drop:
         nf_ct_add_to_dying_list(loser_ct);
         NF_CT_STAT_INC(net, drop);
@@ -1101,7 +1173,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
         return NF_ACCEPT;
  
  out:
-       ret = nf_ct_resolve_clash(skb, h);
+       ret = nf_ct_resolve_clash(skb, h, reply_hash);
  dying:
         nf_conntrack_double_unlock(hash, reply_hash);
         local_bh_enable();
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c

index 7365b43f8f980edb267835006c8d7388ab450336..760ca242281655590ddf0a20ec25c8b73930e06f 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -81,6 +81,18 @@ static bool udp_error(struct sk_buff *skb,
         return false;
  }
  
+static void nf_conntrack_udp_refresh_unreplied(struct nf_conn *ct,
+                                              struct sk_buff *skb,
+                                              enum ip_conntrack_info ctinfo,
+                                              u32 extra_jiffies)
+{
+       if (unlikely(ctinfo == IP_CT_ESTABLISHED_REPLY &&
+                    ct->status & IPS_NAT_CLASH))
+               nf_ct_kill(ct);
+       else
+               nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies);
+}
+
  /* Returns verdict for packet, and may modify conntracktype */
  int nf_conntrack_udp_packet(struct nf_conn *ct,
                             struct sk_buff *skb,
@@ -116,8 +128,8 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
                 if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
                         nf_conntrack_event_cache(IPCT_ASSURED, ct);
         } else {
-               nf_ct_refresh_acct(ct, ctinfo, skb,
-                                  timeouts[UDP_CT_UNREPLIED]);
+               nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo,
+                                                  timeouts[UDP_CT_UNREPLIED]);
         }
         return NF_ACCEPT;
  }
@@ -198,8 +210,8 @@ int nf_conntrack_udplite_packet(struct nf_conn *ct,
                 if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
                         nf_conntrack_event_cache(IPCT_ASSURED, ct);
         } else {
-               nf_ct_refresh_acct(ct, ctinfo, skb,
-                                  timeouts[UDP_CT_UNREPLIED]);
+               nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo,
+                                                  timeouts[UDP_CT_UNREPLIED]);
         }
         return NF_ACCEPT;
  }
author	Florian Westphal <fw@strlen.de>
	Mon, 3 Feb 2020 16:37:07 +0000 (17:37 +0100)
committer	Pablo Neira Ayuso <pablo@netfilter.org>
	Mon, 17 Feb 2020 09:55:14 +0000 (10:55 +0100)
include/linux/rculist_nulls.h		patch \| blob \| history
include/uapi/linux/netfilter/nf_conntrack_common.h		patch \| blob \| history
net/netfilter/nf_conntrack_core.c		patch \| blob \| history
net/netfilter/nf_conntrack_proto_udp.c		patch \| blob \| history