ipv4: Add a sysctl to control multipath hash fields
authorIdo Schimmel <idosch@OSS.NVIDIA.COM>
Mon, 17 May 2021 18:15:18 +0000 (21:15 +0300)
committerDavid S. Miller <davem@davemloft.net>
Tue, 18 May 2021 20:27:32 +0000 (13:27 -0700)
A subsequent patch will add a new multipath hash policy where the packet
fields used for multipath hash calculation are determined by user space.
This patch adds a sysctl that allows user space to set these fields.

The packet fields are represented using a bitmask and are common between
IPv4 and IPv6 to allow user space to use the same numbering across both
protocols. For example, to hash based on standard 5-tuple:

 # sysctl -w net.ipv4.fib_multipath_hash_fields=0x0037
 net.ipv4.fib_multipath_hash_fields = 0x0037

The kernel rejects unknown fields, for example:

 # sysctl -w net.ipv4.fib_multipath_hash_fields=0x1000
 sysctl: setting key "net.ipv4.fib_multipath_hash_fields": Invalid argument

More fields can be added in the future, if needed.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/ip-sysctl.rst
include/net/ip_fib.h
include/net/netns/ipv4.h
net/ipv4/fib_frontend.c
net/ipv4/sysctl_net_ipv4.c

index c2ecc98..4749479 100644 (file)
@@ -100,6 +100,33 @@ fib_multipath_hash_policy - INTEGER
        - 1 - Layer 4
        - 2 - Layer 3 or inner Layer 3 if present
 
+fib_multipath_hash_fields - UNSIGNED INTEGER
+       When fib_multipath_hash_policy is set to 3 (custom multipath hash), the
+       fields used for multipath hash calculation are determined by this
+       sysctl.
+
+       This value is a bitmask which enables various fields for multipath hash
+       calculation.
+
+       Possible fields are:
+
+       ====== ============================
+       0x0001 Source IP address
+       0x0002 Destination IP address
+       0x0004 IP protocol
+       0x0008 Unused (Flow Label)
+       0x0010 Source port
+       0x0020 Destination port
+       0x0040 Inner source IP address
+       0x0080 Inner destination IP address
+       0x0100 Inner IP protocol
+       0x0200 Inner Flow Label
+       0x0400 Inner source port
+       0x0800 Inner destination port
+       ====== ============================
+
+       Default: 0x0007 (source IP, destination IP and IP protocol)
+
 fib_sync_mem - UNSIGNED INTEGER
        Amount of dirty memory from fib entries that can be backlogged before
        synchronize_rcu is forced.
index a914f33..3ab2563 100644 (file)
@@ -466,6 +466,49 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags);
 void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
 void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);
 
+/* Fields used for sysctl_fib_multipath_hash_fields.
+ * Common to IPv4 and IPv6.
+ *
+ * Add new fields at the end. This is user API.
+ */
+#define FIB_MULTIPATH_HASH_FIELD_SRC_IP                        BIT(0)
+#define FIB_MULTIPATH_HASH_FIELD_DST_IP                        BIT(1)
+#define FIB_MULTIPATH_HASH_FIELD_IP_PROTO              BIT(2)
+#define FIB_MULTIPATH_HASH_FIELD_FLOWLABEL             BIT(3)
+#define FIB_MULTIPATH_HASH_FIELD_SRC_PORT              BIT(4)
+#define FIB_MULTIPATH_HASH_FIELD_DST_PORT              BIT(5)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP          BIT(6)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP          BIT(7)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO                BIT(8)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL       BIT(9)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT                BIT(10)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT                BIT(11)
+
+#define FIB_MULTIPATH_HASH_FIELD_OUTER_MASK            \
+       (FIB_MULTIPATH_HASH_FIELD_SRC_IP |              \
+        FIB_MULTIPATH_HASH_FIELD_DST_IP |              \
+        FIB_MULTIPATH_HASH_FIELD_IP_PROTO |            \
+        FIB_MULTIPATH_HASH_FIELD_FLOWLABEL |           \
+        FIB_MULTIPATH_HASH_FIELD_SRC_PORT |            \
+        FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+
+#define FIB_MULTIPATH_HASH_FIELD_INNER_MASK            \
+       (FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP |        \
+        FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP |        \
+        FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO |      \
+        FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL |     \
+        FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT |      \
+        FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
+
+#define FIB_MULTIPATH_HASH_FIELD_ALL_MASK              \
+       (FIB_MULTIPATH_HASH_FIELD_OUTER_MASK |          \
+        FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
+
+#define FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK          \
+       (FIB_MULTIPATH_HASH_FIELD_SRC_IP |              \
+        FIB_MULTIPATH_HASH_FIELD_DST_IP |              \
+        FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
                       const struct sk_buff *skb, struct flow_keys *flkeys);
index f6af8d9..746c80c 100644 (file)
@@ -210,6 +210,7 @@ struct netns_ipv4 {
 #endif
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
+       u32 sysctl_fib_multipath_hash_fields;
        u8 sysctl_fib_multipath_use_neigh;
        u8 sysctl_fib_multipath_hash_policy;
 #endif
index bfb345c..af8814a 100644 (file)
@@ -1514,6 +1514,12 @@ static int __net_init ip_fib_net_init(struct net *net)
        if (err)
                return err;
 
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+       /* Default to 3-tuple */
+       net->ipv4.sysctl_fib_multipath_hash_fields =
+               FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;
+#endif
+
        /* Avoid false sharing : Use at least a full cache line */
        size = max_t(size_t, size, L1_CACHE_BYTES);
 
index a62934b..45bab37 100644 (file)
@@ -19,6 +19,7 @@
 #include <net/snmp.h>
 #include <net/icmp.h>
 #include <net/ip.h>
+#include <net/ip_fib.h>
 #include <net/route.h>
 #include <net/tcp.h>
 #include <net/udp.h>
@@ -48,6 +49,8 @@ static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 static u32 u32_max_div_HZ = UINT_MAX / HZ;
 static int one_day_secs = 24 * 3600;
+static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
+       FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
 
 /* obsolete */
 static int sysctl_tcp_low_latency __read_mostly;
@@ -1052,6 +1055,15 @@ static struct ctl_table ipv4_net_table[] = {
                .extra1         = SYSCTL_ZERO,
                .extra2         = &two,
        },
+       {
+               .procname       = "fib_multipath_hash_fields",
+               .data           = &init_net.ipv4.sysctl_fib_multipath_hash_fields,
+               .maxlen         = sizeof(u32),
+               .mode           = 0644,
+               .proc_handler   = proc_douintvec_minmax,
+               .extra1         = SYSCTL_ONE,
+               .extra2         = &fib_multipath_hash_fields_all_mask,
+       },
 #endif
        {
                .procname       = "ip_unprivileged_port_start",