kernel/sys.c: fix and improve control flow in __sys_setres[ug]id()
[platform/kernel/linux-starfive.git] / kernel / exit.c
index 35e0a31..bccfa42 100644 (file)
 #include <linux/io_uring.h>
 #include <linux/kprobes.h>
 #include <linux/rethook.h>
+#include <linux/sysfs.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/mmu_context.h>
 
+/*
+ * The default value should be high enough to not crash a system that randomly
+ * crashes its kernel from time to time, but low enough to at least not permit
+ * overflowing 32-bit refcounts or the ldsem writer count.
+ */
+static unsigned int oops_limit = 10000;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table kern_exit_table[] = {
+       {
+               .procname       = "oops_limit",
+               .data           = &oops_limit,
+               .maxlen         = sizeof(oops_limit),
+               .mode           = 0644,
+               .proc_handler   = proc_douintvec,
+       },
+       { }
+};
+
+static __init int kernel_exit_sysctls_init(void)
+{
+       register_sysctl_init("kernel", kern_exit_table);
+       return 0;
+}
+late_initcall(kernel_exit_sysctls_init);
+#endif
+
+static atomic_t oops_count = ATOMIC_INIT(0);
+
+#ifdef CONFIG_SYSFS
+static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+                              char *page)
+{
+       return sysfs_emit(page, "%d\n", atomic_read(&oops_count));
+}
+
+static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count);
+
+static __init int kernel_exit_sysfs_init(void)
+{
+       sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL);
+       return 0;
+}
+late_initcall(kernel_exit_sysfs_init);
+#endif
+
 static void __unhash_process(struct task_struct *p, bool group_dead)
 {
        nr_threads--;
@@ -760,6 +807,8 @@ void __noreturn do_exit(long code)
        struct task_struct *tsk = current;
        int group_dead;
 
+       WARN_ON(irqs_disabled());
+
        synchronize_group_exit(tsk, code);
 
        WARN_ON(tsk->plug);
@@ -884,12 +933,18 @@ void __noreturn make_task_dead(int signr)
         * Then do everything else.
         */
        struct task_struct *tsk = current;
+       unsigned int limit;
 
        if (unlikely(in_interrupt()))
                panic("Aiee, killing interrupt handler!");
        if (unlikely(!tsk->pid))
                panic("Attempted to kill the idle task!");
 
+       if (unlikely(irqs_disabled())) {
+               pr_info("note: %s[%d] exited with irqs disabled\n",
+                       current->comm, task_pid_nr(current));
+               local_irq_enable();
+       }
        if (unlikely(in_atomic())) {
                pr_info("note: %s[%d] exited with preempt_count %d\n",
                        current->comm, task_pid_nr(current),
@@ -898,6 +953,20 @@ void __noreturn make_task_dead(int signr)
        }
 
        /*
+        * Every time the system oopses, if the oops happens while a reference
+        * to an object was held, the reference leaks.
+        * If the oops doesn't also leak memory, repeated oopsing can cause
+        * reference counters to wrap around (if they're not using refcount_t).
+        * This means that repeated oopsing can make unexploitable-looking bugs
+        * exploitable through repeated oopsing.
+        * To make sure this can't happen, place an upper bound on how often the
+        * kernel may oops without panic().
+        */
+       limit = READ_ONCE(oops_limit);
+       if (atomic_inc_return(&oops_count) >= limit && limit)
+               panic("Oopsed too often (kernel.oops_limit is %d)", limit);
+
+       /*
         * We're taking recursive faults here in make_task_dead. Safest is to just
         * leave this task alone and wait for reboot.
         */