nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt()
authorNicolas Saenz Julienne <nsaenzju@redhat.com>
Tue, 28 Jun 2022 09:22:59 +0000 (11:22 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 17 Aug 2022 12:23:14 +0000 (14:23 +0200)
[ Upstream commit 5c66d1b9b30f737fcef85a0b75bfe0590e16b62a ]

dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having
called sched_update_tick_dependency() preventing it from re-enabling the
tick on systems that no longer have pending SCHED_RT tasks but have
multiple runnable SCHED_OTHER tasks:

  dequeue_task_rt()
    dequeue_rt_entity()
      dequeue_rt_stack()
        dequeue_top_rt_rq()
  sub_nr_running() // decrements rq->nr_running
    sched_update_tick_dependency()
      sched_can_stop_tick() // checks rq->rt.rt_nr_running,
      ...
        __dequeue_rt_entity()
          dec_rt_tasks() // decrements rq->rt.rt_nr_running
  ...

Every other scheduler class performs the operation in the opposite
order, and sched_update_tick_dependency() expects the values to be
updated as such. So avoid the misbehaviour by inverting the order in
which the above operations are performed in the RT scheduler.

Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model")
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Phil Auld <pauld@redhat.com>
Link: https://lore.kernel.org/r/20220628092259.330171-1-nsaenzju@redhat.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
kernel/sched/rt.c

index 8007d08..f75dcd3 100644 (file)
@@ -444,7 +444,7 @@ static inline void rt_queue_push_tasks(struct rq *rq)
 #endif /* CONFIG_SMP */
 
 static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
-static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
+static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
 
 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 {
@@ -565,7 +565,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
        rt_se = rt_rq->tg->rt_se[cpu];
 
        if (!rt_se) {
-               dequeue_top_rt_rq(rt_rq);
+               dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
                /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
                cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
        }
@@ -651,7 +651,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 
 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 {
-       dequeue_top_rt_rq(rt_rq);
+       dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
 }
 
 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
@@ -1051,7 +1051,7 @@ static void update_curr_rt(struct rq *rq)
 }
 
 static void
-dequeue_top_rt_rq(struct rt_rq *rt_rq)
+dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
 {
        struct rq *rq = rq_of_rt_rq(rt_rq);
 
@@ -1062,7 +1062,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
 
        BUG_ON(!rq->nr_running);
 
-       sub_nr_running(rq, rt_rq->rt_nr_running);
+       sub_nr_running(rq, count);
        rt_rq->rt_queued = 0;
 
 }
@@ -1342,18 +1342,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
 static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
 {
        struct sched_rt_entity *back = NULL;
+       unsigned int rt_nr_running;
 
        for_each_sched_rt_entity(rt_se) {
                rt_se->back = back;
                back = rt_se;
        }
 
-       dequeue_top_rt_rq(rt_rq_of_se(back));
+       rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
 
        for (rt_se = back; rt_se; rt_se = rt_se->back) {
                if (on_rt_rq(rt_se))
                        __dequeue_rt_entity(rt_se, flags);
        }
+
+       dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
 }
 
 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)