udp: Improve load balancing for SO_REUSEPORT.
authorKuniyuki Iwashima <kuniyu@amazon.co.jp>
Tue, 21 Jul 2020 06:15:31 +0000 (15:15 +0900)
committerDavid S. Miller <davem@davemloft.net>
Tue, 21 Jul 2020 22:31:02 +0000 (15:31 -0700)
Currently, SO_REUSEPORT does not work well if connected sockets are in a
UDP reuseport group.

Then reuseport_has_conns() returns true and the result of
reuseport_select_sock() is discarded. Also, unconnected sockets have the
same score, hence only does the first unconnected socket in udp_hslot
always receive all packets sent to unconnected sockets.

So, the result of reuseport_select_sock() should be used for load
balancing.

The noteworthy point is that the unconnected sockets placed after
connected sockets in sock_reuseport.socks will receive more packets than
others because of the algorithm in reuseport_select_sock().

    index | connected | reciprocal_scale | result
    ---------------------------------------------
    0     | no        | 20%              | 40%
    1     | no        | 20%              | 20%
    2     | yes       | 20%              | 0%
    3     | no        | 20%              | 40%
    4     | yes       | 20%              | 0%

If most of the sockets are connected, this can be a problem, but it still
works better than now.

Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
CC: Willem de Bruijn <willemb@google.com>
Reviewed-by: Benjamin Herrenschmidt <benh@amazon.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/ipv4/udp.c
net/ipv6/udp.c

index 1b7ebbc..99251d3 100644 (file)
@@ -416,7 +416,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
                                     struct udp_hslot *hslot2,
                                     struct sk_buff *skb)
 {
-       struct sock *sk, *result;
+       struct sock *sk, *result, *reuseport_result;
        int score, badness;
        u32 hash = 0;
 
@@ -426,17 +426,20 @@ static struct sock *udp4_lib_lookup2(struct net *net,
                score = compute_score(sk, net, saddr, sport,
                                      daddr, hnum, dif, sdif);
                if (score > badness) {
+                       reuseport_result = NULL;
+
                        if (sk->sk_reuseport &&
                            sk->sk_state != TCP_ESTABLISHED) {
                                hash = udp_ehashfn(net, daddr, hnum,
                                                   saddr, sport);
-                               result = reuseport_select_sock(sk, hash, skb,
-                                                       sizeof(struct udphdr));
-                               if (result && !reuseport_has_conns(sk, false))
-                                       return result;
+                               reuseport_result = reuseport_select_sock(sk, hash, skb,
+                                                                        sizeof(struct udphdr));
+                               if (reuseport_result && !reuseport_has_conns(sk, false))
+                                       return reuseport_result;
                        }
+
+                       result = reuseport_result ? : sk;
                        badness = score;
-                       result = sk;
                }
        }
        return result;
index 7d41517..9503c87 100644 (file)
@@ -148,7 +148,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
                int dif, int sdif, struct udp_hslot *hslot2,
                struct sk_buff *skb)
 {
-       struct sock *sk, *result;
+       struct sock *sk, *result, *reuseport_result;
        int score, badness;
        u32 hash = 0;
 
@@ -158,17 +158,20 @@ static struct sock *udp6_lib_lookup2(struct net *net,
                score = compute_score(sk, net, saddr, sport,
                                      daddr, hnum, dif, sdif);
                if (score > badness) {
+                       reuseport_result = NULL;
+
                        if (sk->sk_reuseport &&
                            sk->sk_state != TCP_ESTABLISHED) {
                                hash = udp6_ehashfn(net, daddr, hnum,
                                                    saddr, sport);
 
-                               result = reuseport_select_sock(sk, hash, skb,
-                                                       sizeof(struct udphdr));
-                               if (result && !reuseport_has_conns(sk, false))
-                                       return result;
+                               reuseport_result = reuseport_select_sock(sk, hash, skb,
+                                                                        sizeof(struct udphdr));
+                               if (reuseport_result && !reuseport_has_conns(sk, false))
+                                       return reuseport_result;
                        }
-                       result = sk;
+
+                       result = reuseport_result ? : sk;
                        badness = score;
                }
        }