tls: rx: allow only one reader at a time
authorJakub Kicinski <kuba@kernel.org>
Fri, 15 Jul 2022 05:22:25 +0000 (22:22 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 18 Jul 2022 10:24:10 +0000 (11:24 +0100)
recvmsg() in TLS gets data from the skb list (rx_list) or fresh
skbs we read from TCP via strparser. The former holds skbs which were
already decrypted for peek or decrypted and partially consumed.

tls_wait_data() only notices appearance of fresh skbs coming out
of TCP (or psock). It is possible, if there is a concurrent call
to peek() and recv() that the peek() will move the data from input
to rx_list without recv() noticing. recv() will then read data out
of order or never wake up.

This is not a practical use case/concern, but it makes the self
tests less reliable. This patch solves the problem by allowing
only one reader in.

Because having multiple processes calling read()/peek() is not
normal avoid adding a lock and try to fast-path the single reader
case.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/tls.h
net/tls/tls_sw.c

index 8742e13..e8935cf 100644 (file)
@@ -116,11 +116,14 @@ struct tls_sw_context_rx {
        void (*saved_data_ready)(struct sock *sk);
 
        struct sk_buff *recv_pkt;
+       u8 reader_present;
        u8 async_capable:1;
        u8 zc_capable:1;
+       u8 reader_contended:1;
        atomic_t decrypt_pending;
        /* protect crypto_wait with decrypt_pending*/
        spinlock_t decrypt_compl_lock;
+       struct wait_queue_head wq;
 };
 
 struct tls_record_info {
index 68d79ee..761a637 100644 (file)
@@ -1753,6 +1753,51 @@ tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot,
        sk_flush_backlog(sk);
 }
 
+static long tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
+                              bool nonblock)
+{
+       long timeo;
+
+       lock_sock(sk);
+
+       timeo = sock_rcvtimeo(sk, nonblock);
+
+       while (unlikely(ctx->reader_present)) {
+               DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+               ctx->reader_contended = 1;
+
+               add_wait_queue(&ctx->wq, &wait);
+               sk_wait_event(sk, &timeo,
+                             !READ_ONCE(ctx->reader_present), &wait);
+               remove_wait_queue(&ctx->wq, &wait);
+
+               if (!timeo)
+                       return -EAGAIN;
+               if (signal_pending(current))
+                       return sock_intr_errno(timeo);
+       }
+
+       WRITE_ONCE(ctx->reader_present, 1);
+
+       return timeo;
+}
+
+static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
+{
+       if (unlikely(ctx->reader_contended)) {
+               if (wq_has_sleeper(&ctx->wq))
+                       wake_up(&ctx->wq);
+               else
+                       ctx->reader_contended = 0;
+
+               WARN_ON_ONCE(!ctx->reader_present);
+       }
+
+       WRITE_ONCE(ctx->reader_present, 0);
+       release_sock(sk);
+}
+
 int tls_sw_recvmsg(struct sock *sk,
                   struct msghdr *msg,
                   size_t len,
@@ -1782,7 +1827,9 @@ int tls_sw_recvmsg(struct sock *sk,
                return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
 
        psock = sk_psock_get(sk);
-       lock_sock(sk);
+       timeo = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT);
+       if (timeo < 0)
+               return timeo;
        bpf_strp_enabled = sk_psock_strp_enabled(psock);
 
        /* If crypto failed the connection is broken */
@@ -1801,7 +1848,6 @@ int tls_sw_recvmsg(struct sock *sk,
 
        target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
        len = len - copied;
-       timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
        zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek &&
                ctx->zc_capable;
@@ -1956,7 +2002,7 @@ recv_end:
        copied += decrypted;
 
 end:
-       release_sock(sk);
+       tls_rx_reader_unlock(sk, ctx);
        if (psock)
                sk_psock_put(sk, psock);
        return copied ? : err;
@@ -1978,9 +2024,9 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
        long timeo;
        int chunk;
 
-       lock_sock(sk);
-
-       timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
+       timeo = tls_rx_reader_lock(sk, ctx, flags & SPLICE_F_NONBLOCK);
+       if (timeo < 0)
+               return timeo;
 
        from_queue = !skb_queue_empty(&ctx->rx_list);
        if (from_queue) {
@@ -2029,7 +2075,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
        }
 
 splice_read_end:
-       release_sock(sk);
+       tls_rx_reader_unlock(sk, ctx);
        return copied ? : err;
 }
 
@@ -2371,6 +2417,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
        } else {
                crypto_init_wait(&sw_ctx_rx->async_wait);
                spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
+               init_waitqueue_head(&sw_ctx_rx->wq);
                crypto_info = &ctx->crypto_recv.info;
                cctx = &ctx->rx;
                skb_queue_head_init(&sw_ctx_rx->rx_list);