last_time = page_cpupid_xchg_last(page, time >> PAGE_ACCESS_TIME_BUCKETS);
return last_time << PAGE_ACCESS_TIME_BUCKETS;
}
+
+static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
+{
+ unsigned int pid_bit;
+
+ pid_bit = current->pid % BITS_PER_LONG;
+ if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->access_pids)) {
+ __set_bit(pid_bit, &vma->numab_state->access_pids);
+ }
+}
#else /* !CONFIG_NUMA_BALANCING */
static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
{
{
return false;
}
+
+static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
+{
+}
#endif /* CONFIG_NUMA_BALANCING */
#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
p->mm->numa_scan_offset = 0;
}
+static bool vma_is_accessed(struct vm_area_struct *vma)
+{
+ /*
+ * Allow unconditional access first two times, so that all the (pages)
+ * of VMAs get prot_none fault introduced irrespective of accesses.
+ * This is also done to avoid any side effect of task scanning
+ * amplifying the unfairness of disjoint set of VMAs' access.
+ */
+ if (READ_ONCE(current->mm->numa_scan_seq) < 2)
+ return true;
+
+ return test_bit(current->pid % BITS_PER_LONG,
+ &vma->numab_state->access_pids);
+}
+
/*
* The expensive part of numa migration is done from task_work context.
* Triggered from task_tick_numa().
vma->numab_state->next_scan))
continue;
+ /* Do not scan the VMA if task has not accessed */
+ if (!vma_is_accessed(vma))
+ continue;
+
do {
start = max(start, vma->vm_start);
end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);
{
get_page(page);
+ /* Record the current PID acceesing VMA */
+ vma_set_access_pid_bit(vma);
+
count_vm_numa_event(NUMA_HINT_FAULTS);
if (page_nid == numa_node_id()) {
count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);