static struct kmem_cache *stable_node_cache;
static struct kmem_cache *mm_slot_cache;
+/* The number of pages scanned */
+static unsigned long ksm_pages_scanned;
+
/* The number of nodes in the stable tree */
static unsigned long ksm_pages_shared;
/* Whether to merge empty (zeroed) pages with actual zero pages */
static bool ksm_use_zero_pages __read_mostly;
+/* The number of zero pages which is placed by KSM */
+unsigned long ksm_zero_pages;
+
#ifdef CONFIG_NUMA
/* Zeroed when merging across nodes is not allowed */
static unsigned int ksm_merge_across_nodes = 1;
if (is_migration_entry(entry))
page = pfn_swap_entry_to_page(entry);
}
- ret = page && PageKsm(page);
+ /* return 1 if the page is an normal ksm page or KSM-placed zero page */
+ ret = (page && PageKsm(page)) || is_ksm_zero_pte(*pte);
pte_unmap_unlock(pte, ptl);
return ret;
}
static const struct mm_walk_ops break_ksm_ops = {
.pmd_entry = break_ksm_pmd_entry,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
+static const struct mm_walk_ops break_ksm_lock_vma_ops = {
+ .pmd_entry = break_ksm_pmd_entry,
+ .walk_lock = PGWALK_WRLOCK,
};
/*
* of the process that owns 'vma'. We also do not want to enforce
* protection keys here anyway.
*/
-static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
+static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)
{
vm_fault_t ret = 0;
+ const struct mm_walk_ops *ops = lock_vma ?
+ &break_ksm_lock_vma_ops : &break_ksm_ops;
do {
int ksm_page;
cond_resched();
- ksm_page = walk_page_range_vma(vma, addr, addr + 1,
- &break_ksm_ops, NULL);
+ ksm_page = walk_page_range_vma(vma, addr, addr + 1, ops, NULL);
if (WARN_ON_ONCE(ksm_page < 0))
return ksm_page;
if (!ksm_page)
mmap_read_lock(mm);
vma = find_mergeable_vma(mm, addr);
if (vma)
- break_ksm(vma, addr);
+ break_ksm(vma, addr, false);
mmap_read_unlock(mm);
}
* in cmp_and_merge_page on one of the rmap_items we would be removing.
*/
static int unmerge_ksm_pages(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end, bool lock_vma)
{
unsigned long addr;
int err = 0;
if (signal_pending(current))
err = -ERESTARTSYS;
else
- err = break_ksm(vma, addr);
+ err = break_ksm(vma, addr, lock_vma);
}
return err;
}
if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
continue;
err = unmerge_ksm_pages(vma,
- vma->vm_start, vma->vm_end);
+ vma->vm_start, vma->vm_end, false);
if (err)
goto error;
}
page_add_anon_rmap(kpage, vma, addr, RMAP_NONE);
newpte = mk_pte(kpage, vma->vm_page_prot);
} else {
- newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage),
- vma->vm_page_prot));
+ /*
+ * Use pte_mkdirty to mark the zero page mapped by KSM, and then
+ * we can easily track all KSM-placed zero pages by checking if
+ * the dirty bit in zero page's PTE is set.
+ */
+ newpte = pte_mkdirty(pte_mkspecial(pfn_pte(page_to_pfn(kpage), vma->vm_page_prot)));
+ ksm_zero_pages++;
+ mm->ksm_zero_pages++;
/*
* We're replacing an anonymous page with a zero page, which is
* not anonymous. We need to do proper accounting otherwise we
{
struct ksm_rmap_item *rmap_item;
struct page *page;
+ unsigned int npages = scan_npages;
- while (scan_npages-- && likely(!freezing(current))) {
+ while (npages-- && likely(!freezing(current))) {
cond_resched();
rmap_item = scan_get_next_rmap_item(&page);
if (!rmap_item)
cmp_and_merge_page(page, rmap_item);
put_page(page);
}
+
+ ksm_pages_scanned += scan_npages - npages;
}
static int ksmd_should_run(void)
return 0;
if (vma->anon_vma) {
- err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
+ err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true);
if (err)
return err;
}
return 0; /* just ignore the advice */
if (vma->anon_vma) {
- err = unmerge_ksm_pages(vma, start, end);
+ err = unmerge_ksm_pages(vma, start, end, true);
if (err)
return err;
}
struct anon_vma *av = rmap_item->anon_vma;
anon_vma_lock_read(av);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process(tsk) {
struct anon_vma_chain *vmac;
unsigned long addr;
}
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
anon_vma_unlock_read(av);
}
}
#ifdef CONFIG_PROC_FS
long ksm_process_profit(struct mm_struct *mm)
{
- return mm->ksm_merging_pages * PAGE_SIZE -
+ return (long)(mm->ksm_merging_pages + mm->ksm_zero_pages) * PAGE_SIZE -
mm->ksm_rmap_items * sizeof(struct ksm_rmap_item);
}
#endif /* CONFIG_PROC_FS */
}
KSM_ATTR(max_page_sharing);
+static ssize_t pages_scanned_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lu\n", ksm_pages_scanned);
+}
+KSM_ATTR_RO(pages_scanned);
+
static ssize_t pages_shared_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
}
KSM_ATTR_RO(pages_volatile);
+static ssize_t ksm_zero_pages_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%ld\n", ksm_zero_pages);
+}
+KSM_ATTR_RO(ksm_zero_pages);
+
static ssize_t general_profit_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
long general_profit;
- general_profit = ksm_pages_sharing * PAGE_SIZE -
+ general_profit = (ksm_pages_sharing + ksm_zero_pages) * PAGE_SIZE -
ksm_rmap_items * sizeof(struct ksm_rmap_item);
return sysfs_emit(buf, "%ld\n", general_profit);
&sleep_millisecs_attr.attr,
&pages_to_scan_attr.attr,
&run_attr.attr,
+ &pages_scanned_attr.attr,
&pages_shared_attr.attr,
&pages_sharing_attr.attr,
&pages_unshared_attr.attr,
&pages_volatile_attr.attr,
+ &ksm_zero_pages_attr.attr,
&full_scans_attr.attr,
#ifdef CONFIG_NUMA
&merge_across_nodes_attr.attr,