cgroup/cpuset: Inherit parent's load balance state in v2
authorWaiman Long <longman@redhat.com>
Tue, 27 Jun 2023 14:35:00 +0000 (10:35 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 13 Sep 2023 07:42:49 +0000 (09:42 +0200)
[ Upstream commit c8c926200c55454101f072a4b16c9ff5b8c9e56f ]

Since commit f28e22441f35 ("cgroup/cpuset: Add a new isolated
cpus.partition type"), the CS_SCHED_LOAD_BALANCE bit of a v2 cpuset
can be on or off. The child cpusets of a partition root must have the
same setting as its parent or it may screw up the rebuilding of sched
domains. Fix this problem by making sure the a child v2 cpuset will
follows its parent cpuset load balance state unless the child cpuset
is a new partition root itself.

Fixes: f28e22441f35 ("cgroup/cpuset: Add a new isolated cpus.partition type")
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
kernel/cgroup/cpuset.c

index db3e05b..79e6a5d 100644 (file)
@@ -1606,11 +1606,16 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
                }
 
                /*
-                * Skip the whole subtree if the cpumask remains the same
-                * and has no partition root state and force flag not set.
+                * Skip the whole subtree if
+                * 1) the cpumask remains the same,
+                * 2) has no partition root state,
+                * 3) force flag not set, and
+                * 4) for v2 load balance state same as its parent.
                 */
                if (!cp->partition_root_state && !force &&
-                   cpumask_equal(tmp->new_cpus, cp->effective_cpus)) {
+                   cpumask_equal(tmp->new_cpus, cp->effective_cpus) &&
+                   (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
+                   (is_sched_load_balance(parent) == is_sched_load_balance(cp)))) {
                        pos_css = css_rightmost_descendant(pos_css);
                        continue;
                }
@@ -1694,6 +1699,20 @@ update_parent_subparts:
                update_tasks_cpumask(cp, tmp->new_cpus);
 
                /*
+                * On default hierarchy, inherit the CS_SCHED_LOAD_BALANCE
+                * from parent if current cpuset isn't a valid partition root
+                * and their load balance states differ.
+                */
+               if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+                   !is_partition_valid(cp) &&
+                   (is_sched_load_balance(parent) != is_sched_load_balance(cp))) {
+                       if (is_sched_load_balance(parent))
+                               set_bit(CS_SCHED_LOAD_BALANCE, &cp->flags);
+                       else
+                               clear_bit(CS_SCHED_LOAD_BALANCE, &cp->flags);
+               }
+
+               /*
                 * On legacy hierarchy, if the effective cpumask of any non-
                 * empty cpuset is changed, we need to rebuild sched domains.
                 * On default hierarchy, the cpuset needs to be a partition
@@ -3213,6 +3232,14 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
                cs->use_parent_ecpus = true;
                parent->child_ecpus_count++;
        }
+
+       /*
+        * For v2, clear CS_SCHED_LOAD_BALANCE if parent is isolated
+        */
+       if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+           !is_sched_load_balance(parent))
+               clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
+
        spin_unlock_irq(&callback_lock);
 
        if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))