This function is used for every packet, siphash_4u64 is noticeably faster
than using local buffer + siphash:
Before:
1.23% kpktgend_0 [kernel.vmlinux] [k] __siphash_unaligned
0.14% kpktgend_0 [nf_conntrack] [k] hash_conntrack_raw
After:
0.79% kpktgend_0 [kernel.vmlinux] [k] siphash_4u64
0.15% kpktgend_0 [nf_conntrack] [k] hash_conntrack_raw
In the pktgen test this gives about ~2.4% performance improvement.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
unsigned int zoneid,
const struct net *net)
{
unsigned int zoneid,
const struct net *net)
{
- struct {
- struct nf_conntrack_man src;
- union nf_inet_addr dst_addr;
- unsigned int zone;
- u32 net_mix;
- u16 dport;
- u16 proto;
- } __aligned(SIPHASH_ALIGNMENT) combined;
get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
- memset(&combined, 0, sizeof(combined));
+ /* The direction must be ignored, handle usable tuplehash members manually */
+ a = (u64)tuple->src.u3.all[0] << 32 | tuple->src.u3.all[3];
+ b = (u64)tuple->dst.u3.all[0] << 32 | tuple->dst.u3.all[3];
- /* The direction must be ignored, so handle usable members manually. */
- combined.src = tuple->src;
- combined.dst_addr = tuple->dst.u3;
- combined.zone = zoneid;
- combined.net_mix = net_hash_mix(net);
- combined.dport = (__force __u16)tuple->dst.u.all;
- combined.proto = tuple->dst.protonum;
+ c = (__force u64)tuple->src.u.all << 32 | (__force u64)tuple->dst.u.all << 16;
+ c |= tuple->dst.protonum;
- return (u32)siphash(&combined, sizeof(combined), &nf_conntrack_hash_rnd);
+ d = (u64)zoneid << 32 | net_hash_mix(net);
+
+ /* IPv4: u3.all[1,2,3] == 0 */
+ c ^= (u64)tuple->src.u3.all[1] << 32 | tuple->src.u3.all[2];
+ d += (u64)tuple->dst.u3.all[1] << 32 | tuple->dst.u3.all[2];
+
+ return (u32)siphash_4u64(a, b, c, d, &nf_conntrack_hash_rnd);
}
static u32 scale_hash(u32 hash)
}
static u32 scale_hash(u32 hash)