net: implement per-cpu reserves for memory_allocated
authorEric Dumazet <edumazet@google.com>
Thu, 9 Jun 2022 06:34:09 +0000 (23:34 -0700)
committerJakub Kicinski <kuba@kernel.org>
Fri, 10 Jun 2022 23:21:26 +0000 (16:21 -0700)
We plan keeping sk->sk_forward_alloc as small as possible
in future patches.

This means we are going to call sk_memory_allocated_add()
and sk_memory_allocated_sub() more often.

Implement a per-cpu cache of +1/-1 MB, to reduce number
of changes to sk->sk_prot->memory_allocated, which
would otherwise be cause of false sharing.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/net/sock.h

index 825f8cb..59040fe 100644 (file)
@@ -1398,21 +1398,47 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
 }
 
 static inline long
+proto_memory_allocated(const struct proto *prot)
+{
+       return max(0L, atomic_long_read(prot->memory_allocated));
+}
+
+static inline long
 sk_memory_allocated(const struct sock *sk)
 {
-       return atomic_long_read(sk->sk_prot->memory_allocated);
+       return proto_memory_allocated(sk->sk_prot);
 }
 
+/* 1 MB per cpu, in page units */
+#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
+
 static inline long
 sk_memory_allocated_add(struct sock *sk, int amt)
 {
-       return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
+       int local_reserve;
+
+       preempt_disable();
+       local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
+       if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
+               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
+               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
+       }
+       preempt_enable();
+       return sk_memory_allocated(sk);
 }
 
 static inline void
 sk_memory_allocated_sub(struct sock *sk, int amt)
 {
-       atomic_long_sub(amt, sk->sk_prot->memory_allocated);
+       int local_reserve;
+
+       preempt_disable();
+       local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
+       if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
+               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
+               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
+       }
+       preempt_enable();
 }
 
 #define SK_ALLOC_PERCPU_COUNTER_BATCH 16
@@ -1441,12 +1467,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
        return percpu_counter_sum_positive(prot->sockets_allocated);
 }
 
-static inline long
-proto_memory_allocated(struct proto *prot)
-{
-       return atomic_long_read(prot->memory_allocated);
-}
-
 static inline bool
 proto_memory_pressure(struct proto *prot)
 {