kernel/sys.c: fix and improve control flow in __sys_setres[ug]id()

[platform/kernel/linux-starfive.git] / kernel / exit.c
diff --git a/kernel/exit.c b/kernel/exit.c

index 35e0a31..bccfa42 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -67,11 +67,58 @@
  #include <linux/io_uring.h>
  #include <linux/kprobes.h>
  #include <linux/rethook.h>
+#include <linux/sysfs.h>
  
  #include <linux/uaccess.h>
  #include <asm/unistd.h>
  #include <asm/mmu_context.h>
  
+/*
+ * The default value should be high enough to not crash a system that randomly
+ * crashes its kernel from time to time, but low enough to at least not permit
+ * overflowing 32-bit refcounts or the ldsem writer count.
+ */
+static unsigned int oops_limit = 10000;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table kern_exit_table[] = {
+       {
+               .procname       = "oops_limit",
+               .data           = &oops_limit,
+               .maxlen         = sizeof(oops_limit),
+               .mode           = 0644,
+               .proc_handler   = proc_douintvec,
+       },
+       { }
+};
+
+static __init int kernel_exit_sysctls_init(void)
+{
+       register_sysctl_init("kernel", kern_exit_table);
+       return 0;
+}
+late_initcall(kernel_exit_sysctls_init);
+#endif
+
+static atomic_t oops_count = ATOMIC_INIT(0);
+
+#ifdef CONFIG_SYSFS
+static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+                              char *page)
+{
+       return sysfs_emit(page, "%d\n", atomic_read(&oops_count));
+}
+
+static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count);
+
+static __init int kernel_exit_sysfs_init(void)
+{
+       sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL);
+       return 0;
+}
+late_initcall(kernel_exit_sysfs_init);
+#endif
+
  static void __unhash_process(struct task_struct *p, bool group_dead)
  {
         nr_threads--;
@@ -760,6 +807,8 @@ void __noreturn do_exit(long code)
         struct task_struct *tsk = current;
         int group_dead;
  
+       WARN_ON(irqs_disabled());
+
         synchronize_group_exit(tsk, code);
  
         WARN_ON(tsk->plug);
@@ -884,12 +933,18 @@ void __noreturn make_task_dead(int signr)
          * Then do everything else.
          */
         struct task_struct *tsk = current;
+       unsigned int limit;
  
         if (unlikely(in_interrupt()))
                 panic("Aiee, killing interrupt handler!");
         if (unlikely(!tsk->pid))
                 panic("Attempted to kill the idle task!");
  
+       if (unlikely(irqs_disabled())) {
+               pr_info("note: %s[%d] exited with irqs disabled\n",
+                       current->comm, task_pid_nr(current));
+               local_irq_enable();
+       }
         if (unlikely(in_atomic())) {
                 pr_info("note: %s[%d] exited with preempt_count %d\n",
                         current->comm, task_pid_nr(current),
@@ -898,6 +953,20 @@ void __noreturn make_task_dead(int signr)
         }
  
         /*
+        * Every time the system oopses, if the oops happens while a reference
+        * to an object was held, the reference leaks.
+        * If the oops doesn't also leak memory, repeated oopsing can cause
+        * reference counters to wrap around (if they're not using refcount_t).
+        * This means that repeated oopsing can make unexploitable-looking bugs
+        * exploitable through repeated oopsing.
+        * To make sure this can't happen, place an upper bound on how often the
+        * kernel may oops without panic().
+        */
+       limit = READ_ONCE(oops_limit);
+       if (atomic_inc_return(&oops_count) >= limit && limit)
+               panic("Oopsed too often (kernel.oops_limit is %d)", limit);
+
+       /*
          * We're taking recursive faults here in make_task_dead. Safest is to just
          * leave this task alone and wait for reboot.
          */