sched/numa: Avoid migrating task to CPU-less node

author Huang Ying <ying.huang@intel.com>

Mon, 14 Feb 2022 12:15:53 +0000 (20:15 +0800)

committer Peter Zijlstra <peterz@infradead.org>

Wed, 16 Feb 2022 14:57:53 +0000 (15:57 +0100)
author Huang Ying <ying.huang@intel.com>
Mon, 14 Feb 2022 12:15:53 +0000 (20:15 +0800)
committer Peter Zijlstra <peterz@infradead.org>
Wed, 16 Feb 2022 14:57:53 +0000 (15:57 +0100)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index da3230b842507ba778edb2368b6a399c6743ec74..11a72e1b3b2c0e6da31964390e1d069fe34e4b70 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1989,7 +1989,7 @@ static int task_numa_migrate(struct task_struct *p)
          */
         ng = deref_curr_numa_group(p);
         if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
-               for_each_online_node(nid) {
+               for_each_node_state(nid, N_CPU) {
                         if (nid == env.src_nid || nid == p->numa_preferred_nid)
                                 continue;
  
@@ -2087,13 +2087,13 @@ static void numa_group_count_active_nodes(struct numa_group *numa_group)
         unsigned long faults, max_faults = 0;
         int nid, active_nodes = 0;
  
-       for_each_online_node(nid) {
+       for_each_node_state(nid, N_CPU) {
                 faults = group_faults_cpu(numa_group, nid);
                 if (faults > max_faults)
                         max_faults = faults;
         }
  
-       for_each_online_node(nid) {
+       for_each_node_state(nid, N_CPU) {
                 faults = group_faults_cpu(numa_group, nid);
                 if (faults * ACTIVE_NODE_FRACTION > max_faults)
                         active_nodes++;
@@ -2247,7 +2247,7 @@ static int preferred_group_nid(struct task_struct *p, int nid)
  
                 dist = sched_max_numa_distance;
  
-               for_each_online_node(node) {
+               for_each_node_state(node, N_CPU) {
                         score = group_weight(p, node, dist);
                         if (score > max_score) {
                                 max_score = score;
@@ -2266,7 +2266,7 @@ static int preferred_group_nid(struct task_struct *p, int nid)
          * inside the highest scoring group of nodes. The nodemask tricks
          * keep the complexity of the search down.
          */
-       nodes = node_online_map;
+       nodes = node_states[N_CPU];
         for (dist = sched_max_numa_distance; dist > LOCAL_DISTANCE; dist--) {
                 unsigned long max_faults = 0;
                 nodemask_t max_group = NODE_MASK_NONE;
@@ -2405,6 +2405,21 @@ static void task_numa_placement(struct task_struct *p)
                 }
         }
  
+       /* Cannot migrate task to CPU-less node */
+       if (!node_state(max_nid, N_CPU)) {
+               int near_nid = max_nid;
+               int distance, near_distance = INT_MAX;
+
+               for_each_node_state(nid, N_CPU) {
+                       distance = node_distance(max_nid, nid);
+                       if (distance < near_distance) {
+                               near_nid = nid;
+                               near_distance = distance;
+                       }
+               }
+               max_nid = near_nid;
+       }
+
         if (ng) {
                 numa_group_count_active_nodes(ng);
                 spin_unlock_irq(group_lock);
author	Huang Ying <ying.huang@intel.com>
	Mon, 14 Feb 2022 12:15:53 +0000 (20:15 +0800)
committer	Peter Zijlstra <peterz@infradead.org>
	Wed, 16 Feb 2022 14:57:53 +0000 (15:57 +0100)