sched/core: Avoid multiple calling update_rq_clock() in __cfsb_csd_unthrottle()
authorHao Jia <jiahao.os@bytedance.com>
Tue, 13 Jun 2023 08:20:11 +0000 (16:20 +0800)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 16 Jun 2023 20:08:13 +0000 (22:08 +0200)
After commit 8ad075c2eb1f ("sched: Async unthrottling for cfs
bandwidth"), we may update the rq clock multiple times in the loop of
__cfsb_csd_unthrottle().

A prior (although less common) instance of this problem exists in
unthrottle_offline_cfs_rqs().

Cure both by ensuring update_rq_clock() is called before the loop and
setting RQCF_ACT_SKIP during the loop, to supress further updates.
The alternative would be pulling update_rq_clock() out of
unthrottle_cfs_rq(), but that gives an even bigger mess.

Fixes: 8ad075c2eb1f ("sched: Async unthrottling for cfs bandwidth")
Reviewed-By: Ben Segall <bsegall@google.com>
Suggested-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Hao Jia <jiahao.os@bytedance.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20230613082012.49615-4-jiahao.os@bytedance.com
kernel/sched/fair.c
kernel/sched/sched.h

index 7666dbc..a80a739 100644 (file)
@@ -5577,6 +5577,14 @@ static void __cfsb_csd_unthrottle(void *arg)
        rq_lock(rq, &rf);
 
        /*
+        * Iterating over the list can trigger several call to
+        * update_rq_clock() in unthrottle_cfs_rq().
+        * Do it once and skip the potential next ones.
+        */
+       update_rq_clock(rq);
+       rq_clock_start_loop_update(rq);
+
+       /*
         * Since we hold rq lock we're safe from concurrent manipulation of
         * the CSD list. However, this RCU critical section annotates the
         * fact that we pair with sched_free_group_rcu(), so that we cannot
@@ -5595,6 +5603,7 @@ static void __cfsb_csd_unthrottle(void *arg)
 
        rcu_read_unlock();
 
+       rq_clock_stop_loop_update(rq);
        rq_unlock(rq, &rf);
 }
 
@@ -6115,6 +6124,13 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 
        lockdep_assert_rq_held(rq);
 
+       /*
+        * The rq clock has already been updated in the
+        * set_rq_offline(), so we should skip updating
+        * the rq clock again in unthrottle_cfs_rq().
+        */
+       rq_clock_start_loop_update(rq);
+
        rcu_read_lock();
        list_for_each_entry_rcu(tg, &task_groups, list) {
                struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
@@ -6137,6 +6153,8 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
                        unthrottle_cfs_rq(cfs_rq);
        }
        rcu_read_unlock();
+
+       rq_clock_stop_loop_update(rq);
 }
 
 #else /* CONFIG_CFS_BANDWIDTH */
index 36e23e4..50d4b61 100644 (file)
@@ -1546,6 +1546,28 @@ static inline void rq_clock_cancel_skipupdate(struct rq *rq)
        rq->clock_update_flags &= ~RQCF_REQ_SKIP;
 }
 
+/*
+ * During cpu offlining and rq wide unthrottling, we can trigger
+ * an update_rq_clock() for several cfs and rt runqueues (Typically
+ * when using list_for_each_entry_*)
+ * rq_clock_start_loop_update() can be called after updating the clock
+ * once and before iterating over the list to prevent multiple update.
+ * After the iterative traversal, we need to call rq_clock_stop_loop_update()
+ * to clear RQCF_ACT_SKIP of rq->clock_update_flags.
+ */
+static inline void rq_clock_start_loop_update(struct rq *rq)
+{
+       lockdep_assert_rq_held(rq);
+       SCHED_WARN_ON(rq->clock_update_flags & RQCF_ACT_SKIP);
+       rq->clock_update_flags |= RQCF_ACT_SKIP;
+}
+
+static inline void rq_clock_stop_loop_update(struct rq *rq)
+{
+       lockdep_assert_rq_held(rq);
+       rq->clock_update_flags &= ~RQCF_ACT_SKIP;
+}
+
 struct rq_flags {
        unsigned long flags;
        struct pin_cookie cookie;