sched, cgroup: Restore meaning to hierarchical_quota

author Phil Auld <pauld@redhat.com>

Fri, 14 Jul 2023 12:57:46 +0000 (08:57 -0400)

committer Peter Zijlstra <peterz@infradead.org>

Wed, 2 Aug 2023 14:19:26 +0000 (16:19 +0200)
author Phil Auld <pauld@redhat.com>
Fri, 14 Jul 2023 12:57:46 +0000 (08:57 -0400)
committer Peter Zijlstra <peterz@infradead.org>
Wed, 2 Aug 2023 14:19:26 +0000 (16:19 +0200)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 83e36547af176255eb25dca0d8dd583613f342c3..3af25caf6343afbe193e59de319ac3341c0c1db6 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9953,7 +9953,7 @@ void __init sched_init(void)
                 ptr += nr_cpu_ids * sizeof(void **);
  
                 root_task_group.shares = ROOT_TASK_GROUP_LOAD;
-               init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
+               init_cfs_bandwidth(&root_task_group.cfs_bandwidth, NULL);
  #endif /* CONFIG_FAIR_GROUP_SCHED */
  #ifdef CONFIG_RT_GROUP_SCHED
                 root_task_group.rt_se = (struct sched_rt_entity **)ptr;
@@ -11087,11 +11087,16 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
  
                 /*
                  * Ensure max(child_quota) <= parent_quota.  On cgroup2,
-                * always take the min.  On cgroup1, only inherit when no
-                * limit is set:
+                * always take the non-RUNTIME_INF min.  On cgroup1, only
+                * inherit when no limit is set. In both cases this is used
+                * by the scheduler to determine if a given CFS task has a
+                * bandwidth constraint at some higher level.
                  */
                 if (cgroup_subsys_on_dfl(cpu_cgrp_subsys)) {
-                       quota = min(quota, parent_quota);
+                       if (quota == RUNTIME_INF)
+                               quota = parent_quota;
+                       else if (parent_quota != RUNTIME_INF)
+                               quota = min(quota, parent_quota);
                 } else {
                         if (quota == RUNTIME_INF)
                                 quota = parent_quota;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index f55b0a72772ef7d3c1c57b3189e6693174d25c08..26bfbb640894f62e69d2dcbaf39409e53f6183b9 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6045,13 +6045,14 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
         return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
  }
  
-void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
+void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent)
  {
         raw_spin_lock_init(&cfs_b->lock);
         cfs_b->runtime = 0;
         cfs_b->quota = RUNTIME_INF;
         cfs_b->period = ns_to_ktime(default_cfs_period());
         cfs_b->burst = 0;
+       cfs_b->hierarchical_quota = parent ? parent->hierarchical_quota : RUNTIME_INF;
  
         INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
         hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
@@ -6217,7 +6218,7 @@ static inline int throttled_lb_pair(struct task_group *tg,
         return 0;
  }
  
-void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
+void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent) {}
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
  static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
@@ -12599,7 +12600,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
  
         tg->shares = NICE_0_LOAD;
  
-       init_cfs_bandwidth(tg_cfs_bandwidth(tg));
+       init_cfs_bandwidth(tg_cfs_bandwidth(tg), tg_cfs_bandwidth(parent));
  
         for_each_possible_cpu(i) {
                 cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 9baeb1a2dfdd450eda93297bf457f53787835c88..602de71b48e1ea7ce90e7e1c0fbed16b3387046c 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -454,7 +454,7 @@ extern void unregister_fair_sched_group(struct task_group *tg);
  extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
                         struct sched_entity *se, int cpu,
                         struct sched_entity *parent);
-extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
+extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent);
  
  extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
  extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
author	Phil Auld <pauld@redhat.com>
	Fri, 14 Jul 2023 12:57:46 +0000 (08:57 -0400)
committer	Peter Zijlstra <peterz@infradead.org>
	Wed, 2 Aug 2023 14:19:26 +0000 (16:19 +0200)
kernel/sched/core.c		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history
kernel/sched/sched.h		patch \| blob \| history