return ret;
}
+#ifdef CONFIG_NUMA_BALANCING
+/* Migrate current task p to target_cpu */
+int migrate_task_to(struct task_struct *p, int target_cpu)
+{
+ struct migration_arg arg = { p, target_cpu };
+ int curr_cpu = task_cpu(p);
+
+ if (curr_cpu == target_cpu)
+ return 0;
+
+ if (!cpumask_test_cpu(target_cpu, tsk_cpus_allowed(p)))
+ return -EINVAL;
+
+ /* TODO: This is not properly updating schedstats */
+
+ return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
+}
+#endif
+
/*
* migration_cpu_stop - this will be executed by a highprio stopper thread
* and performs thread migration by bumping thread off CPU then
*/
unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3;
+static unsigned long weighted_cpuload(const int cpu);
+
+
+static int
+find_idlest_cpu_node(int this_cpu, int nid)
+{
+ unsigned long load, min_load = ULONG_MAX;
+ int i, idlest_cpu = this_cpu;
+
+ BUG_ON(cpu_to_node(this_cpu) == nid);
+
+ rcu_read_lock();
+ for_each_cpu(i, cpumask_of_node(nid)) {
+ load = weighted_cpuload(i);
+
+ if (load < min_load) {
+ min_load = load;
+ idlest_cpu = i;
+ }
+ }
+ rcu_read_unlock();
+
+ return idlest_cpu;
+}
+
static void task_numa_placement(struct task_struct *p)
{
int seq, nid, max_nid = -1;
}
}
- /* Update the tasks preferred node if necessary */
+ /*
+ * Record the preferred node as the node with the most faults,
+ * requeue the task to be running on the idlest CPU on the
+ * preferred node and reset the scanning rate to recheck
+ * the working set placement.
+ */
if (max_faults && max_nid != p->numa_preferred_nid) {
+ int preferred_cpu;
+
+ /*
+ * If the task is not on the preferred node then find the most
+ * idle CPU to migrate to.
+ */
+ preferred_cpu = task_cpu(p);
+ if (cpu_to_node(preferred_cpu) != max_nid) {
+ preferred_cpu = find_idlest_cpu_node(preferred_cpu,
+ max_nid);
+ }
+
+ /* Update the preferred nid and migrate task if possible */
p->numa_preferred_nid = max_nid;
p->numa_migrate_seq = 0;
+ migrate_task_to(p, preferred_cpu);
}
}