Reimplement RLIMIT_SIGPENDING on top of ucounts
authorAlexey Gladkov <legion@kernel.org>
Thu, 22 Apr 2021 12:27:13 +0000 (14:27 +0200)
committerEric W. Biederman <ebiederm@xmission.com>
Fri, 30 Apr 2021 19:14:02 +0000 (14:14 -0500)
The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

Changelog

v11:
* Revert most of changes to fix performance issues.

v10:
* Fix memory leak on get_ucounts failure.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/df9d7764dddd50f28616b7840de74ec0f81711a8.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
fs/proc/array.c
include/linux/sched/user.h
include/linux/signal_types.h
include/linux/user_namespace.h
kernel/fork.c
kernel/signal.c
kernel/ucount.c
kernel/user.c
kernel/user_namespace.c

index bb87e4d..74b0ea4 100644 (file)
@@ -284,7 +284,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
                collect_sigign_sigcatch(p, &ignored, &caught);
                num_threads = get_nr_threads(p);
                rcu_read_lock();  /* FIXME: is this correct? */
-               qsize = atomic_read(&__task_cred(p)->user->sigpending);
+               qsize = get_ucounts_value(task_ucounts(p), UCOUNT_RLIMIT_SIGPENDING);
                rcu_read_unlock();
                qlim = task_rlimit(p, RLIMIT_SIGPENDING);
                unlock_task_sighand(p, &flags);
index 8a34446..8ba9cec 100644 (file)
@@ -12,7 +12,6 @@
  */
 struct user_struct {
        refcount_t __count;     /* reference count */
-       atomic_t sigpending;    /* How many pending signals does this user have? */
 #ifdef CONFIG_FANOTIFY
        atomic_t fanotify_listeners;
 #endif
index 68e06c7..34cb28b 100644 (file)
@@ -13,6 +13,8 @@ typedef struct kernel_siginfo {
        __SIGINFO;
 } kernel_siginfo_t;
 
+struct ucounts;
+
 /*
  * Real Time signals may be queued.
  */
@@ -21,7 +23,7 @@ struct sigqueue {
        struct list_head list;
        int flags;
        kernel_siginfo_t info;
-       struct user_struct *user;
+       struct ucounts *ucounts;
 };
 
 /* flags values. */
index 5eeb86b..58f4179 100644 (file)
@@ -52,6 +52,7 @@ enum ucount_type {
 #endif
        UCOUNT_RLIMIT_NPROC,
        UCOUNT_RLIMIT_MSGQUEUE,
+       UCOUNT_RLIMIT_SIGPENDING,
        UCOUNT_COUNTS,
 };
 
index a9c5097..0311992 100644 (file)
@@ -824,6 +824,7 @@ void __init fork_init(void)
 
        init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
        init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE);
+       init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING);
 
 #ifdef CONFIG_VMAP_STACK
        cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
index f271835..9a6dab7 100644 (file)
@@ -413,8 +413,8 @@ static struct sigqueue *
 __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
 {
        struct sigqueue *q = NULL;
-       struct user_struct *user;
-       int sigpending;
+       struct ucounts *ucounts = NULL;
+       long sigpending;
 
        /*
         * Protect access to @t credentials. This can go away when all
@@ -425,27 +425,26 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
         * changes from/to zero.
         */
        rcu_read_lock();
-       user = __task_cred(t)->user;
-       sigpending = atomic_inc_return(&user->sigpending);
+       ucounts = task_ucounts(t);
+       sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
        if (sigpending == 1)
-               get_uid(user);
+               ucounts = get_ucounts(ucounts);
        rcu_read_unlock();
 
-       if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
+       if (override_rlimit || (sigpending < LONG_MAX && sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
                q = kmem_cache_alloc(sigqueue_cachep, flags);
        } else {
                print_dropped_signal(sig);
        }
 
        if (unlikely(q == NULL)) {
-               if (atomic_dec_and_test(&user->sigpending))
-                       free_uid(user);
+               if (ucounts && dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1))
+                       put_ucounts(ucounts);
        } else {
                INIT_LIST_HEAD(&q->list);
                q->flags = 0;
-               q->user = user;
+               q->ucounts = ucounts;
        }
-
        return q;
 }
 
@@ -453,8 +452,10 @@ static void __sigqueue_free(struct sigqueue *q)
 {
        if (q->flags & SIGQUEUE_PREALLOC)
                return;
-       if (atomic_dec_and_test(&q->user->sigpending))
-               free_uid(q->user);
+       if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) {
+               put_ucounts(q->ucounts);
+               q->ucounts = NULL;
+       }
        kmem_cache_free(sigqueue_cachep, q);
 }
 
index 6e6f936..8ce62da 100644 (file)
@@ -82,6 +82,7 @@ static struct ctl_table user_table[] = {
 #endif
        { },
        { },
+       { },
        { }
 };
 #endif /* CONFIG_SYSCTL */
index 7f5ff49..6737327 100644 (file)
@@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
 /* root_user.__count is 1, for init task cred */
 struct user_struct root_user = {
        .__count        = REFCOUNT_INIT(1),
-       .sigpending     = ATOMIC_INIT(0),
        .locked_shm     = 0,
        .uid            = GLOBAL_ROOT_UID,
        .ratelimit      = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0),
index 7eccc4f..822eace 100644 (file)
@@ -124,6 +124,7 @@ int create_user_ns(struct cred *new)
        }
        ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC);
        ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE);
+       ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING);
        ns->ucounts = ucounts;
 
        /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */