u64 node_stamp; /* migration stamp */
struct callback_head numa_work;
+ /*
+ * Exponential decaying average of faults on a per-node basis.
+ * Scheduling placement decisions are made based on the these counts.
+ * The values remain static for the duration of a PTE scan
+ */
unsigned long *numa_faults;
+
+ /*
+ * numa_faults_buffer records faults per node during the current
+ * scan window. When the scan completes, the counts in numa_faults
+ * decay and these values are copied.
+ */
+ unsigned long *numa_faults_buffer;
+
int numa_preferred_nid;
#endif /* CONFIG_NUMA_BALANCING */
p->numa_preferred_nid = -1;
p->numa_work.next = &p->numa_work;
p->numa_faults = NULL;
+ p->numa_faults_buffer = NULL;
#endif /* CONFIG_NUMA_BALANCING */
}
/* Find the node with the highest number of faults */
for_each_online_node(nid) {
- unsigned long faults = p->numa_faults[nid];
+ unsigned long faults;
+
+ /* Decay existing window and copy faults since last scan */
p->numa_faults[nid] >>= 1;
+ p->numa_faults[nid] += p->numa_faults_buffer[nid];
+ p->numa_faults_buffer[nid] = 0;
+
+ faults = p->numa_faults[nid];
if (faults > max_faults) {
max_faults = faults;
max_nid = nid;
if (unlikely(!p->numa_faults)) {
int size = sizeof(*p->numa_faults) * nr_node_ids;
- p->numa_faults = kzalloc(size, GFP_KERNEL|__GFP_NOWARN);
+ /* numa_faults and numa_faults_buffer share the allocation */
+ p->numa_faults = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN);
if (!p->numa_faults)
return;
+
+ BUG_ON(p->numa_faults_buffer);
+ p->numa_faults_buffer = p->numa_faults + nr_node_ids;
}
/*
task_numa_placement(p);
- p->numa_faults[node] += pages;
+ p->numa_faults_buffer[node] += pages;
}
static void reset_ptenuma_scan(struct task_struct *p)