From: Jakub Sitnicki Date: Tue, 18 Feb 2020 17:10:13 +0000 (+0000) Subject: net, sk_msg: Annotate lockless access to sk_prot on clone X-Git-Tag: v5.15~4200^2~372^2^2~10 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b8e202d1d1d0f182f01062804efb523ea9a9008c;p=platform%2Fkernel%2Flinux-starfive.git net, sk_msg: Annotate lockless access to sk_prot on clone sk_msg and ULP frameworks override protocol callbacks pointer in sk->sk_prot, while tcp accesses it locklessly when cloning the listening socket, that is with neither sk_lock nor sk_callback_lock held. Once we enable use of listening sockets with sockmap (and hence sk_msg), there will be shared access to sk->sk_prot if socket is getting cloned while being inserted/deleted to/from the sockmap from another CPU: Read side: tcp_v4_rcv sk = __inet_lookup_skb(...) tcp_check_req(sk) inet_csk(sk)->icsk_af_ops->syn_recv_sock tcp_v4_syn_recv_sock tcp_create_openreq_child inet_csk_clone_lock sk_clone_lock READ_ONCE(sk->sk_prot) Write side: sock_map_ops->map_update_elem sock_map_update_elem sock_map_update_common sock_map_link_no_progs tcp_bpf_init tcp_bpf_update_sk_prot sk_psock_update_proto WRITE_ONCE(sk->sk_prot, ops) sock_map_ops->map_delete_elem sock_map_delete_elem __sock_map_delete sock_map_unref sk_psock_put sk_psock_drop sk_psock_restore_proto tcp_update_ulp WRITE_ONCE(sk->sk_prot, proto) Mark the shared access with READ_ONCE/WRITE_ONCE annotations. Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200218171023.844439-2-jakub@cloudflare.com --- diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index d90ef61..112765b 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -352,7 +352,8 @@ static inline void sk_psock_update_proto(struct sock *sk, psock->saved_write_space = sk->sk_write_space; psock->sk_proto = sk->sk_prot; - sk->sk_prot = ops; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, ops); } static inline void sk_psock_restore_proto(struct sock *sk, diff --git a/net/core/sock.c b/net/core/sock.c index a4c8fac..bf1173b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1572,13 +1572,14 @@ static inline void sock_lock_init(struct sock *sk) */ static void sock_copy(struct sock *nsk, const struct sock *osk) { + const struct proto *prot = READ_ONCE(osk->sk_prot); #ifdef CONFIG_SECURITY_NETWORK void *sptr = nsk->sk_security; #endif memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, - osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); + prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; @@ -1792,16 +1793,17 @@ static void sk_init_common(struct sock *sk) */ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) { + struct proto *prot = READ_ONCE(sk->sk_prot); struct sock *newsk; bool is_charged = true; - newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); + newsk = sk_prot_alloc(prot, priority, sk->sk_family); if (newsk != NULL) { struct sk_filter *filter; sock_copy(newsk, sk); - newsk->sk_prot_creator = sk->sk_prot; + newsk->sk_prot_creator = prot; /* SANITY */ if (likely(newsk->sk_net_refcnt)) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 8a01428..dd183b0 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -645,8 +645,10 @@ static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock) /* Reinit occurs when program types change e.g. TCP_BPF_TX is removed * or added requiring sk_prot hook updates. We keep original saved * hooks in this case. + * + * Pairs with lockless read in sk_clone_lock(). */ - sk->sk_prot = &tcp_bpf_prots[family][config]; + WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]); } static int tcp_bpf_assert_proto_ops(struct proto *ops) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 38d3ad1..6c43fa1 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -106,7 +106,8 @@ void tcp_update_ulp(struct sock *sk, struct proto *proto, if (!icsk->icsk_ulp_ops) { sk->sk_write_space = write_space; - sk->sk_prot = proto; + /* Pairs with lockless read in sk_clone_lock() */ + WRITE_ONCE(sk->sk_prot, proto); return; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 94774c0..82225bc 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -742,7 +742,8 @@ static void tls_update(struct sock *sk, struct proto *p, ctx->sk_write_space = write_space; ctx->sk_proto = p; } else { - sk->sk_prot = p; + /* Pairs with lockless read in sk_clone_lock(). */ + WRITE_ONCE(sk->sk_prot, p); sk->sk_write_space = write_space; } }