sched/fair: Fix unthrottle_cfs_rq() for leaf_cfs_rq list
authorVincent Guittot <vincent.guittot@linaro.org>
Wed, 13 May 2020 13:55:28 +0000 (15:55 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 19 May 2020 18:34:10 +0000 (20:34 +0200)
Although not exactly identical, unthrottle_cfs_rq() and enqueue_task_fair()
are quite close and follow the same sequence for enqueuing an entity in the
cfs hierarchy. Modify unthrottle_cfs_rq() to use the same pattern as
enqueue_task_fair(). This fixes a problem already faced with the latter and
add an optimization in the last for_each_sched_entity loop.

Fixes: fe61468b2cb (sched/fair: Fix enqueue_task_fair warning)
Reported-by Tao Zhou <zohooouoto@zoho.com.cn>
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Phil Auld <pauld@redhat.com>
Reviewed-by: Ben Segall <bsegall@google.com>
Link: https://lkml.kernel.org/r/20200513135528.4742-1-vincent.guittot@linaro.org
kernel/sched/fair.c

index c6d57c3..538ba5d 100644 (file)
@@ -4774,7 +4774,6 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
        struct rq *rq = rq_of(cfs_rq);
        struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
        struct sched_entity *se;
-       int enqueue = 1;
        long task_delta, idle_task_delta;
 
        se = cfs_rq->tg->se[cpu_of(rq)];
@@ -4798,26 +4797,44 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
        idle_task_delta = cfs_rq->idle_h_nr_running;
        for_each_sched_entity(se) {
                if (se->on_rq)
-                       enqueue = 0;
+                       break;
+               cfs_rq = cfs_rq_of(se);
+               enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
 
+               cfs_rq->h_nr_running += task_delta;
+               cfs_rq->idle_h_nr_running += idle_task_delta;
+
+               /* end evaluation on encountering a throttled cfs_rq */
+               if (cfs_rq_throttled(cfs_rq))
+                       goto unthrottle_throttle;
+       }
+
+       for_each_sched_entity(se) {
                cfs_rq = cfs_rq_of(se);
-               if (enqueue) {
-                       enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
-               } else {
-                       update_load_avg(cfs_rq, se, 0);
-                       se_update_runnable(se);
-               }
+
+               update_load_avg(cfs_rq, se, UPDATE_TG);
+               se_update_runnable(se);
 
                cfs_rq->h_nr_running += task_delta;
                cfs_rq->idle_h_nr_running += idle_task_delta;
 
+
+               /* end evaluation on encountering a throttled cfs_rq */
                if (cfs_rq_throttled(cfs_rq))
-                       break;
+                       goto unthrottle_throttle;
+
+               /*
+                * One parent has been throttled and cfs_rq removed from the
+                * list. Add it back to not break the leaf list.
+                */
+               if (throttled_hierarchy(cfs_rq))
+                       list_add_leaf_cfs_rq(cfs_rq);
        }
 
-       if (!se)
-               add_nr_running(rq, task_delta);
+       /* At this point se is NULL and we are at root level*/
+       add_nr_running(rq, task_delta);
 
+unthrottle_throttle:
        /*
         * The cfs_rq_throttled() breaks in the above iteration can result in
         * incomplete leaf list maintenance, resulting in triggering the
@@ -4826,7 +4843,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
        for_each_sched_entity(se) {
                cfs_rq = cfs_rq_of(se);
 
-               list_add_leaf_cfs_rq(cfs_rq);
+               if (list_add_leaf_cfs_rq(cfs_rq))
+                       break;
        }
 
        assert_list_leaf_cfs_rq(rq);