Merge tag 'zstd-linus-v6.2' of https://github.com/terrelln/linux
[platform/kernel/linux-starfive.git] / mm / ksm.c
index c19fcca..dd02780 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -39,6 +39,7 @@
 #include <linux/freezer.h>
 #include <linux/oom.h>
 #include <linux/numa.h>
+#include <linux/pagewalk.h>
 
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -419,47 +420,74 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
        return atomic_read(&mm->mm_users) == 0;
 }
 
+static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next,
+                       struct mm_walk *walk)
+{
+       struct page *page = NULL;
+       spinlock_t *ptl;
+       pte_t *pte;
+       int ret;
+
+       if (pmd_leaf(*pmd) || !pmd_present(*pmd))
+               return 0;
+
+       pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+       if (pte_present(*pte)) {
+               page = vm_normal_page(walk->vma, addr, *pte);
+       } else if (!pte_none(*pte)) {
+               swp_entry_t entry = pte_to_swp_entry(*pte);
+
+               /*
+                * As KSM pages remain KSM pages until freed, no need to wait
+                * here for migration to end.
+                */
+               if (is_migration_entry(entry))
+                       page = pfn_swap_entry_to_page(entry);
+       }
+       ret = page && PageKsm(page);
+       pte_unmap_unlock(pte, ptl);
+       return ret;
+}
+
+static const struct mm_walk_ops break_ksm_ops = {
+       .pmd_entry = break_ksm_pmd_entry,
+};
+
 /*
- * We use break_ksm to break COW on a ksm page: it's a stripped down
- *
- *     if (get_user_pages(addr, 1, FOLL_WRITE, &page, NULL) == 1)
- *             put_page(page);
+ * We use break_ksm to break COW on a ksm page by triggering unsharing,
+ * such that the ksm page will get replaced by an exclusive anonymous page.
  *
- * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
+ * We take great care only to touch a ksm page, in a VM_MERGEABLE vma,
  * in case the application has unmapped and remapped mm,addr meanwhile.
  * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
  * mmap of /dev/mem, where we would not want to touch it.
  *
- * FAULT_FLAG/FOLL_REMOTE are because we do this outside the context
+ * FAULT_FLAG_REMOTE/FOLL_REMOTE are because we do this outside the context
  * of the process that owns 'vma'.  We also do not want to enforce
  * protection keys here anyway.
  */
 static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
 {
-       struct page *page;
        vm_fault_t ret = 0;
 
        do {
+               int ksm_page;
+
                cond_resched();
-               page = follow_page(vma, addr,
-                               FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
-               if (IS_ERR_OR_NULL(page))
-                       break;
-               if (PageKsm(page))
-                       ret = handle_mm_fault(vma, addr,
-                                             FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE,
-                                             NULL);
-               else
-                       ret = VM_FAULT_WRITE;
-               put_page(page);
-       } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
+               ksm_page = walk_page_range_vma(vma, addr, addr + 1,
+                                              &break_ksm_ops, NULL);
+               if (WARN_ON_ONCE(ksm_page < 0))
+                       return ksm_page;
+               if (!ksm_page)
+                       return 0;
+               ret = handle_mm_fault(vma, addr,
+                                     FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+                                     NULL);
+       } while (!(ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
        /*
-        * We must loop because handle_mm_fault() may back out if there's
-        * any difficulty e.g. if pte accessed bit gets updated concurrently.
-        *
-        * VM_FAULT_WRITE is what we have been hoping for: it indicates that
-        * COW has been broken, even if the vma does not permit VM_WRITE;
-        * but note that a concurrent fault might break PageKsm for us.
+        * We must loop until we no longer find a KSM page because
+        * handle_mm_fault() may back out if there's any difficulty e.g. if
+        * pte accessed bit gets updated concurrently.
         *
         * VM_FAULT_SIGBUS could occur if we race with truncation of the
         * backing file, which also invalidates anonymous pages: that's
@@ -1041,7 +1069,6 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 
        anon_exclusive = PageAnonExclusive(page);
        if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
-           (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) ||
            anon_exclusive || mm_tlb_flush_pending(mm)) {
                pte_t entry;
 
@@ -1079,11 +1106,11 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 
                if (pte_dirty(entry))
                        set_page_dirty(page);
+               entry = pte_mkclean(entry);
+
+               if (pte_write(entry))
+                       entry = pte_wrprotect(entry);
 
-               if (pte_protnone(entry))
-                       entry = pte_mkclean(pte_clear_savedwrite(entry));
-               else
-                       entry = pte_mkclean(pte_wrprotect(entry));
                set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
        }
        *orig_pte = *pvmw.pte;
@@ -3211,7 +3238,7 @@ static int __init ksm_init(void)
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
        /* There is no significance to this priority 100 */
-       hotplug_memory_notifier(ksm_memory_callback, 100);
+       hotplug_memory_notifier(ksm_memory_callback, KSM_CALLBACK_PRI);
 #endif
        return 0;