ucounts: Fix signal ucount refcounting
authorEric W. Biederman <ebiederm@xmission.com>
Sat, 16 Oct 2021 20:59:49 +0000 (15:59 -0500)
committerEric W. Biederman <ebiederm@xmission.com>
Mon, 18 Oct 2021 21:02:30 +0000 (16:02 -0500)
In commit fda31c50292a ("signal: avoid double atomic counter
increments for user accounting") Linus made a clever optimization to
how rlimits and the struct user_struct.  Unfortunately that
optimization does not work in the obvious way when moved to nested
rlimits.  The problem is that the last decrement of the per user
namespace per user sigpending counter might also be the last decrement
of the sigpending counter in the parent user namespace as well.  Which
means that simply freeing the leaf ucount in __free_sigqueue is not
enough.

Maintain the optimization and handle the tricky cases by introducing
inc_rlimit_get_ucounts and dec_rlimit_put_ucounts.

By moving the entire optimization into functions that perform all of
the work it becomes possible to ensure that every level is handled
properly.

The new function inc_rlimit_get_ucounts returns 0 on failure to
increment the ucount.  This is different than inc_rlimit_ucounts which
increments the ucounts and returns LONG_MAX if the ucount counter has
exceeded it's maximum or it wrapped (to indicate the counter needs to
decremented).

I wish we had a single user to account all pending signals to across
all of the threads of a process so this complexity was not necessary

Cc: stable@vger.kernel.org
Fixes: d64696905554 ("Reimplement RLIMIT_SIGPENDING on top of ucounts")
v1: https://lkml.kernel.org/r/87mtnavszx.fsf_-_@disp2133
Link: https://lkml.kernel.org/r/87fssytizw.fsf_-_@disp2133
Reviewed-by: Alexey Gladkov <legion@kernel.org>
Tested-by: Rune Kleveland <rune.kleveland@infomedia.dk>
Tested-by: Yu Zhao <yuzhao@google.com>
Tested-by: Jordan Glover <Golden_Miller83@protonmail.ch>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
include/linux/user_namespace.h
kernel/signal.c
kernel/ucount.c

index eb70cabe6e7f2f6362bde868a21dbf257701afe9..33a4240e6a6f1789f8a3153a264030e62f4645d7 100644 (file)
@@ -127,6 +127,8 @@ static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type t
 
 long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
+long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type);
+void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type);
 bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
 
 static inline void set_rlimit_ucount_max(struct user_namespace *ns,
index a3229add4455479c23c2f48670b8c33645869456..13d2505a14a0e9274f1a76852726512ca6df2362 100644 (file)
@@ -425,22 +425,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
         */
        rcu_read_lock();
        ucounts = task_ucounts(t);
-       sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
-       switch (sigpending) {
-       case 1:
-               if (likely(get_ucounts(ucounts)))
-                       break;
-               fallthrough;
-       case LONG_MAX:
-               /*
-                * we need to decrease the ucount in the userns tree on any
-                * failure to avoid counts leaking.
-                */
-               dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
-               rcu_read_unlock();
-               return NULL;
-       }
+       sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
        rcu_read_unlock();
+       if (!sigpending)
+               return NULL;
 
        if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
                q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
@@ -449,8 +437,7 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
        }
 
        if (unlikely(q == NULL)) {
-               if (dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1))
-                       put_ucounts(ucounts);
+               dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
        } else {
                INIT_LIST_HEAD(&q->list);
                q->flags = sigqueue_flags;
@@ -463,8 +450,8 @@ static void __sigqueue_free(struct sigqueue *q)
 {
        if (q->flags & SIGQUEUE_PREALLOC)
                return;
-       if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) {
-               put_ucounts(q->ucounts);
+       if (q->ucounts) {
+               dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
                q->ucounts = NULL;
        }
        kmem_cache_free(sigqueue_cachep, q);
index bb51849e6375288493d1429e07a49ee8de925986..eb03f3c68375df0cfaae6d61e5c3ed04bc9805a7 100644 (file)
@@ -284,6 +284,55 @@ bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
        return (new == 0);
 }
 
+static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
+                               struct ucounts *last, enum ucount_type type)
+{
+       struct ucounts *iter, *next;
+       for (iter = ucounts; iter != last; iter = next) {
+               long dec = atomic_long_add_return(-1, &iter->ucount[type]);
+               WARN_ON_ONCE(dec < 0);
+               next = iter->ns->ucounts;
+               if (dec == 0)
+                       put_ucounts(iter);
+       }
+}
+
+void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type)
+{
+       do_dec_rlimit_put_ucounts(ucounts, NULL, type);
+}
+
+long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
+{
+       /* Caller must hold a reference to ucounts */
+       struct ucounts *iter;
+       long dec, ret = 0;
+
+       for (iter = ucounts; iter; iter = iter->ns->ucounts) {
+               long max = READ_ONCE(iter->ns->ucount_max[type]);
+               long new = atomic_long_add_return(1, &iter->ucount[type]);
+               if (new < 0 || new > max)
+                       goto unwind;
+               if (iter == ucounts)
+                       ret = new;
+               /*
+                * Grab an extra ucount reference for the caller when
+                * the rlimit count was previously 0.
+                */
+               if (new != 1)
+                       continue;
+               if (!get_ucounts(iter))
+                       goto dec_unwind;
+       }
+       return ret;
+dec_unwind:
+       dec = atomic_long_add_return(-1, &iter->ucount[type]);
+       WARN_ON_ONCE(dec < 0);
+unwind:
+       do_dec_rlimit_put_ucounts(ucounts, iter, type);
+       return 0;
+}
+
 bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max)
 {
        struct ucounts *iter;