posix-cpu-timers: Provide mechanisms to defer timer handling to task_work

author Thomas Gleixner <tglx@linutronix.de>

Thu, 30 Jul 2020 10:14:06 +0000 (12:14 +0200)

committer Ingo Molnar <mingo@kernel.org>

Thu, 6 Aug 2020 14:50:59 +0000 (16:50 +0200)
author Thomas Gleixner <tglx@linutronix.de>
Thu, 30 Jul 2020 10:14:06 +0000 (12:14 +0200)
committer Ingo Molnar <mingo@kernel.org>
Thu, 6 Aug 2020 14:50:59 +0000 (16:50 +0200)
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h

index e3f0f85..896c16d 100644 (file)
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -6,6 +6,7 @@
  #include <linux/list.h>
  #include <linux/alarmtimer.h>
  #include <linux/timerqueue.h>
+#include <linux/task_work.h>
  
  struct kernel_siginfo;
  struct task_struct;
@@ -125,6 +126,16 @@ struct posix_cputimers {
         unsigned int                    expiry_active;
  };
  
+/**
+ * posix_cputimers_work - Container for task work based posix CPU timer expiry
+ * @work:      The task work to be scheduled
+ * @scheduled:  @work has been scheduled already, no further processing
+ */
+struct posix_cputimers_work {
+       struct callback_head    work;
+       unsigned int            scheduled;
+};
+
  static inline void posix_cputimers_init(struct posix_cputimers *pct)
  {
         memset(pct, 0, sizeof(*pct));
@@ -165,6 +176,12 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
                                               u64 cpu_limit) { }
  #endif
  
+#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+void posix_cputimers_init_work(void);
+#else
+static inline void posix_cputimers_init_work(void) { }
+#endif
+
  #define REQUEUE_PENDING 1
  
  /**
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 06ec604..e9942ce 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -889,6 +889,10 @@ struct task_struct {
         /* Empty if CONFIG_POSIX_CPUTIMERS=n */
         struct posix_cputimers          posix_cputimers;
  
+#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+       struct posix_cputimers_work     posix_cputimers_work;
+#endif
+
         /* Process credentials: */
  
         /* Tracer's credentials at attach: */
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig

index fcc4235..a09b1d6 100644 (file)
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -52,6 +52,15 @@ config GENERIC_CLOCKEVENTS_MIN_ADJUST
  config GENERIC_CMOS_UPDATE
         bool
  
+# Select to handle posix CPU timers from task_work
+# and not from the timer interrupt context
+config HAVE_POSIX_CPU_TIMERS_TASK_WORK
+       bool
+
+config POSIX_CPU_TIMERS_TASK_WORK
+       bool
+       default y if POSIX_TIMERS && HAVE_POSIX_CPU_TIMERS_TASK_WORK
+
  if GENERIC_CLOCKEVENTS
  menu "Timers subsystem"
  
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c

index e5ad873..a71758e 100644 (file)
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -377,6 +377,7 @@ static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
   */
  static int posix_cpu_timer_create(struct k_itimer *new_timer)
  {
+       static struct lock_class_key posix_cpu_timers_key;
         struct pid *pid;
  
         rcu_read_lock();
@@ -386,6 +387,17 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
                 return -EINVAL;
         }
  
+       /*
+        * If posix timer expiry is handled in task work context then
+        * timer::it_lock can be taken without disabling interrupts as all
+        * other locking happens in task context. This requires a seperate
+        * lock class key otherwise regular posix timer expiry would record
+        * the lock class being taken in interrupt context and generate a
+        * false positive warning.
+        */
+       if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK))
+               lockdep_set_class(&new_timer->it_lock, &posix_cpu_timers_key);
+
         new_timer->kclock = &clock_posix_cpu;
         timerqueue_init(&new_timer->it.cpu.node);
         new_timer->it.cpu.pid = get_pid(pid);
@@ -1080,26 +1092,163 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
         return false;
  }
  
-static void __run_posix_cpu_timers(struct task_struct *tsk)
+static void handle_posix_cpu_timers(struct task_struct *tsk);
+
+#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+static void posix_cpu_timers_work(struct callback_head *work)
+{
+       handle_posix_cpu_timers(current);
+}
+
+/*
+ * Initialize posix CPU timers task work in init task. Out of line to
+ * keep the callback static and to avoid header recursion hell.
+ */
+void __init posix_cputimers_init_work(void)
+{
+       init_task_work(&current->posix_cputimers_work.work,
+                      posix_cpu_timers_work);
+}
+
+/*
+ * Note: All operations on tsk->posix_cputimer_work.scheduled happen either
+ * in hard interrupt context or in task context with interrupts
+ * disabled. Aside of that the writer/reader interaction is always in the
+ * context of the current task, which means they are strict per CPU.
+ */
+static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
+{
+       return tsk->posix_cputimers_work.scheduled;
+}
+
+static inline void __run_posix_cpu_timers(struct task_struct *tsk)
+{
+       if (WARN_ON_ONCE(tsk->posix_cputimers_work.scheduled))
+               return;
+
+       /* Schedule task work to actually expire the timers */
+       tsk->posix_cputimers_work.scheduled = true;
+       task_work_add(tsk, &tsk->posix_cputimers_work.work, TWA_RESUME);
+}
+
+static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,
+                                               unsigned long start)
+{
+       bool ret = true;
+
+       /*
+        * On !RT kernels interrupts are disabled while collecting expired
+        * timers, so no tick can happen and the fast path check can be
+        * reenabled without further checks.
+        */
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+               tsk->posix_cputimers_work.scheduled = false;
+               return true;
+       }
+
+       /*
+        * On RT enabled kernels ticks can happen while the expired timers
+        * are collected under sighand lock. But any tick which observes
+        * the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath
+        * checks. So reenabling the tick work has do be done carefully:
+        *
+        * Disable interrupts and run the fast path check if jiffies have
+        * advanced since the collecting of expired timers started. If
+        * jiffies have not advanced or the fast path check did not find
+        * newly expired timers, reenable the fast path check in the timer
+        * interrupt. If there are newly expired timers, return false and
+        * let the collection loop repeat.
+        */
+       local_irq_disable();
+       if (start != jiffies && fastpath_timer_check(tsk))
+               ret = false;
+       else
+               tsk->posix_cputimers_work.scheduled = false;
+       local_irq_enable();
+
+       return ret;
+}
+#else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
+static inline void __run_posix_cpu_timers(struct task_struct *tsk)
+{
+       lockdep_posixtimer_enter();
+       handle_posix_cpu_timers(tsk);
+       lockdep_posixtimer_exit();
+}
+
+static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
+{
+       return false;
+}
+
+static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,
+                                               unsigned long start)
+{
+       return true;
+}
+#endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
+
+static void handle_posix_cpu_timers(struct task_struct *tsk)
  {
         struct k_itimer *timer, *next;
-       unsigned long flags;
+       unsigned long flags, start;
         LIST_HEAD(firing);
  
         if (!lock_task_sighand(tsk, &flags))
                 return;
  
-       /*
-        * Here we take off tsk->signal->cpu_timers[N] and
-        * tsk->cpu_timers[N] all the timers that are firing, and
-        * put them on the firing list.
-        */
-       check_thread_timers(tsk, &firing);
+       do {
+               /*
+                * On RT locking sighand lock does not disable interrupts,
+                * so this needs to be careful vs. ticks. Store the current
+                * jiffies value.
+                */
+               start = READ_ONCE(jiffies);
+               barrier();
  
-       check_process_timers(tsk, &firing);
+               /*
+                * Here we take off tsk->signal->cpu_timers[N] and
+                * tsk->cpu_timers[N] all the timers that are firing, and
+                * put them on the firing list.
+                */
+               check_thread_timers(tsk, &firing);
+
+               check_process_timers(tsk, &firing);
+
+               /*
+                * The above timer checks have updated the exipry cache and
+                * because nothing can have queued or modified timers after
+                * sighand lock was taken above it is guaranteed to be
+                * consistent. So the next timer interrupt fastpath check
+                * will find valid data.
+                *
+                * If timer expiry runs in the timer interrupt context then
+                * the loop is not relevant as timers will be directly
+                * expired in interrupt context. The stub function below
+                * returns always true which allows the compiler to
+                * optimize the loop out.
+                *
+                * If timer expiry is deferred to task work context then
+                * the following rules apply:
+                *
+                * - On !RT kernels no tick can have happened on this CPU
+                *   after sighand lock was acquired because interrupts are
+                *   disabled. So reenabling task work before dropping
+                *   sighand lock and reenabling interrupts is race free.
+                *
+                * - On RT kernels ticks might have happened but the tick
+                *   work ignored posix CPU timer handling because the
+                *   CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work
+                *   must be done very carefully including a check whether
+                *   ticks have happened since the start of the timer
+                *   expiry checks. posix_cpu_timers_enable_work() takes
+                *   care of that and eventually lets the expiry checks
+                *   run again.
+                */
+       } while (!posix_cpu_timers_enable_work(tsk, start));
  
         /*
-        * We must release these locks before taking any timer's lock.
+        * We must release sighand lock before taking any timer's lock.
          * There is a potential race with timer deletion here, as the
          * siglock now protects our private firing list.  We have set
          * the firing flag in each timer, so that a deletion attempt
@@ -1117,6 +1266,13 @@ static void __run_posix_cpu_timers(struct task_struct *tsk)
         list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) {
                 int cpu_firing;
  
+               /*
+                * spin_lock() is sufficient here even independent of the
+                * expiry context. If expiry happens in hard interrupt
+                * context it's obvious. For task work context it's safe
+                * because all other operations on timer::it_lock happen in
+                * task context (syscall or exit).
+                */
                 spin_lock(&timer->it_lock);
                 list_del_init(&timer->it.cpu.elist);
                 cpu_firing = timer->it.cpu.firing;
@@ -1144,15 +1300,20 @@ void run_posix_cpu_timers(void)
         lockdep_assert_irqs_disabled();
  
         /*
+        * If the actual expiry is deferred to task work context and the
+        * work is already scheduled there is no point to do anything here.
+        */
+       if (posix_cpu_timers_work_scheduled(tsk))
+               return;
+
+       /*
          * The fast path checks that there are no expired thread or thread
          * group timers.  If that's so, just return.
          */
         if (!fastpath_timer_check(tsk))
                 return;
  
-       lockdep_posixtimer_enter();
         __run_posix_cpu_timers(tsk);
-       lockdep_posixtimer_exit();
  }
  
  /*
diff --git a/kernel/time/timer.c b/kernel/time/timer.c

index ae5029f..a16764b 100644 (file)
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -2017,6 +2017,7 @@ static void __init init_timer_cpus(void)
  void __init init_timers(void)
  {
         init_timer_cpus();
+       posix_cputimers_init_work();
         open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
  }
author	Thomas Gleixner <tglx@linutronix.de>
	Thu, 30 Jul 2020 10:14:06 +0000 (12:14 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 6 Aug 2020 14:50:59 +0000 (16:50 +0200)
include/linux/posix-timers.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
kernel/time/Kconfig		patch \| blob \| history
kernel/time/posix-cpu-timers.c		patch \| blob \| history
kernel/time/timer.c		patch \| blob \| history