cgroup/cpuset: Enable update_tasks_cpumask() on top_cpuset
authorWaiman Long <longman@redhat.com>
Thu, 1 Sep 2022 20:57:36 +0000 (16:57 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 26 Oct 2022 10:35:25 +0000 (12:35 +0200)
[ Upstream commit ec5fbdfb99d18482619ac42605cb80fbb56068ee ]

Previously, update_tasks_cpumask() is not supposed to be called with
top cpuset. With cpuset partition that takes CPUs away from the top
cpuset, adjusting the cpus_mask of the tasks in the top cpuset is
necessary. Percpu kthreads, however, are ignored.

Fixes: ee8dde0cd2ce ("cpuset: Add new v2 cpuset.sched.partition flag")
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
kernel/cgroup/cpuset.c

index 3213d3c..428820b 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/kmod.h>
+#include <linux/kthread.h>
 #include <linux/list.h>
 #include <linux/mempolicy.h>
 #include <linux/mm.h>
@@ -1087,10 +1088,18 @@ static void update_tasks_cpumask(struct cpuset *cs)
 {
        struct css_task_iter it;
        struct task_struct *task;
+       bool top_cs = cs == &top_cpuset;
 
        css_task_iter_start(&cs->css, 0, &it);
-       while ((task = css_task_iter_next(&it)))
+       while ((task = css_task_iter_next(&it))) {
+               /*
+                * Percpu kthreads in top_cpuset are ignored
+                */
+               if (top_cs && (task->flags & PF_KTHREAD) &&
+                   kthread_is_per_cpu(task))
+                       continue;
                set_cpus_allowed_ptr(task, cs->effective_cpus);
+       }
        css_task_iter_end(&it);
 }
 
@@ -2052,12 +2061,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
                update_flag(CS_CPU_EXCLUSIVE, cs, 0);
        }
 
-       /*
-        * Update cpumask of parent's tasks except when it is the top
-        * cpuset as some system daemons cannot be mapped to other CPUs.
-        */
-       if (parent != &top_cpuset)
-               update_tasks_cpumask(parent);
+       update_tasks_cpumask(parent);
 
        if (parent->child_ecpus_count)
                update_sibling_cpumasks(parent, cs, &tmpmask);