WIP: update tizen_qemu_defconfig

[platform/kernel/linux-starfive.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index 8a6d5c8..2083078 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -875,12 +875,8 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                         return -EBUSY;
                 return -ENOENT;
         } else if (is_pte_marker_entry(entry)) {
-               /*
-                * We're copying the pgtable should only because dst_vma has
-                * uffd-wp enabled, do sanity check.
-                */
-               WARN_ON_ONCE(!userfaultfd_wp(dst_vma));
-               set_pte_at(dst_mm, addr, dst_pte, pte);
+               if (userfaultfd_wp(dst_vma))
+                       set_pte_at(dst_mm, addr, dst_pte, pte);
                 return 0;
         }
         if (!userfaultfd_wp(dst_vma))
@@ -1341,15 +1337,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
         return ret;
  }
  
-/*
- * Parameter block passed down to zap_pte_range in exceptional cases.
- */
-struct zap_details {
-       struct folio *single_folio;     /* Locked folio to be unmapped */
-       bool even_cows;                 /* Zap COWed private pages too? */
-       zap_flags_t zap_flags;          /* Extra flags for zapping */
-};
-
  /* Whether we should zap all COWed (private) pages too */
  static inline bool should_zap_cows(struct zap_details *details)
  {
@@ -1720,7 +1707,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
  {
         struct mmu_notifier_range range;
         struct zap_details details = {
-               .zap_flags = ZAP_FLAG_DROP_MARKER,
+               .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
                 /* Careful - we need to zap private pages too! */
                 .even_cows = true,
         };
@@ -1774,19 +1761,27 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
   *
   * The range must fit into one VMA.
   */
-static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
+void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
                 unsigned long size, struct zap_details *details)
  {
+       const unsigned long end = address + size;
         struct mmu_notifier_range range;
         struct mmu_gather tlb;
  
         lru_add_drain();
         mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
-                               address, address + size);
+                               address, end);
+       if (is_vm_hugetlb_page(vma))
+               adjust_range_if_pmd_sharing_possible(vma, &range.start,
+                                                    &range.end);
         tlb_gather_mmu(&tlb, vma->vm_mm);
         update_hiwater_rss(vma->vm_mm);
         mmu_notifier_invalidate_range_start(&range);
-       unmap_single_vma(&tlb, vma, address, range.end, details);
+       /*
+        * unmap 'address-end' not 'range.start-range.end' as range
+        * could have been expanded for hugetlb pmd sharing.
+        */
+       unmap_single_vma(&tlb, vma, address, end, details);
         mmu_notifier_invalidate_range_end(&range);
         tlb_finish_mmu(&tlb);
  }
@@ -2848,10 +2843,16 @@ static inline int pte_unmap_same(struct vm_fault *vmf)
         return same;
  }
  
-static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
-                                      struct vm_fault *vmf)
+/*
+ * Return:
+ *     0:              copied succeeded
+ *     -EHWPOISON:     copy failed due to hwpoison in source page
+ *     -EAGAIN:        copied failed (some other reason)
+ */
+static inline int __wp_page_copy_user(struct page *dst, struct page *src,
+                                     struct vm_fault *vmf)
  {
-       bool ret;
+       int ret;
         void *kaddr;
         void __user *uaddr;
         bool locked = false;
@@ -2860,8 +2861,11 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
         unsigned long addr = vmf->address;
  
         if (likely(src)) {
-               copy_user_highpage(dst, src, addr, vma);
-               return true;
+               if (copy_mc_user_highpage(dst, src, addr, vma)) {
+                       memory_failure_queue(page_to_pfn(src), 0);
+                       return -EHWPOISON;
+               }
+               return 0;
         }
  
         /*
@@ -2888,7 +2892,7 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
                          * and update local tlb only
                          */
                         update_mmu_tlb(vma, addr, vmf->pte);
-                       ret = false;
+                       ret = -EAGAIN;
                         goto pte_unlock;
                 }
  
@@ -2913,7 +2917,7 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
                 if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
                         /* The PTE changed under us, update local tlb */
                         update_mmu_tlb(vma, addr, vmf->pte);
-                       ret = false;
+                       ret = -EAGAIN;
                         goto pte_unlock;
                 }
  
@@ -2932,7 +2936,7 @@ warn:
                 }
         }
  
-       ret = true;
+       ret = 0;
  
  pte_unlock:
         if (locked)
@@ -3104,6 +3108,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
         pte_t entry;
         int page_copied = 0;
         struct mmu_notifier_range range;
+       int ret;
  
         delayacct_wpcopy_start();
  
@@ -3121,19 +3126,21 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
                 if (!new_page)
                         goto oom;
  
-               if (!__wp_page_copy_user(new_page, old_page, vmf)) {
+               ret = __wp_page_copy_user(new_page, old_page, vmf);
+               if (ret) {
                         /*
                          * COW failed, if the fault was solved by other,
                          * it's fine. If not, userspace would re-fault on
                          * the same address and we will handle the fault
                          * from the second attempt.
+                        * The -EHWPOISON case will not be retried.
                          */
                         put_page(new_page);
                         if (old_page)
                                 put_page(old_page);
  
                         delayacct_wpcopy_end();
-                       return 0;
+                       return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
                 }
                 kmsan_copy_page_meta(new_page, old_page);
         }
@@ -3624,8 +3631,21 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
         struct vm_area_struct *vma = vmf->vma;
         struct mmu_notifier_range range;
  
-       if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags))
+       /*
+        * We need a reference to lock the folio because we don't hold
+        * the PTL so a racing thread can remove the device-exclusive
+        * entry and unmap it. If the folio is free the entry must
+        * have been removed already. If it happens to have already
+        * been re-allocated after being freed all we do is lock and
+        * unlock it.
+        */
+       if (!folio_try_get(folio))
+               return 0;
+
+       if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) {
+               folio_put(folio);
                 return VM_FAULT_RETRY;
+       }
         mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma,
                                 vma->vm_mm, vmf->address & PAGE_MASK,
                                 (vmf->address & PAGE_MASK) + PAGE_SIZE, NULL);
@@ -3638,6 +3658,7 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
  
         pte_unmap_unlock(vmf->pte, vmf->ptl);
         folio_unlock(folio);
+       folio_put(folio);
  
         mmu_notifier_invalidate_range_end(&range);
         return 0;
@@ -3960,6 +3981,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
         }
  
         /*
+        * Some architectures may have to restore extra metadata to the page
+        * when reading from swap. This metadata may be indexed by swap entry
+        * so this must be called before swap_free().
+        */
+       arch_swap_restore(entry, folio);
+
+       /*
          * Remove the swap entry and conditionally try to free up the swapcache.
          * We're already holding a reference on the page but haven't mapped it
          * yet.
@@ -5237,6 +5265,125 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
  }
  EXPORT_SYMBOL_GPL(handle_mm_fault);
  
+#ifdef CONFIG_LOCK_MM_AND_FIND_VMA
+#include <linux/extable.h>
+
+static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
+{
+       /* Even if this succeeds, make it clear we *might* have slept */
+       if (likely(mmap_read_trylock(mm))) {
+               might_sleep();
+               return true;
+       }
+
+       if (regs && !user_mode(regs)) {
+               unsigned long ip = instruction_pointer(regs);
+               if (!search_exception_tables(ip))
+                       return false;
+       }
+
+       return !mmap_read_lock_killable(mm);
+}
+
+static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
+{
+       /*
+        * We don't have this operation yet.
+        *
+        * It should be easy enough to do: it's basically a
+        *    atomic_long_try_cmpxchg_acquire()
+        * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
+        * it also needs the proper lockdep magic etc.
+        */
+       return false;
+}
+
+static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
+{
+       mmap_read_unlock(mm);
+       if (regs && !user_mode(regs)) {
+               unsigned long ip = instruction_pointer(regs);
+               if (!search_exception_tables(ip))
+                       return false;
+       }
+       return !mmap_write_lock_killable(mm);
+}
+
+/*
+ * Helper for page fault handling.
+ *
+ * This is kind of equivalend to "mmap_read_lock()" followed
+ * by "find_extend_vma()", except it's a lot more careful about
+ * the locking (and will drop the lock on failure).
+ *
+ * For example, if we have a kernel bug that causes a page
+ * fault, we don't want to just use mmap_read_lock() to get
+ * the mm lock, because that would deadlock if the bug were
+ * to happen while we're holding the mm lock for writing.
+ *
+ * So this checks the exception tables on kernel faults in
+ * order to only do this all for instructions that are actually
+ * expected to fault.
+ *
+ * We can also actually take the mm lock for writing if we
+ * need to extend the vma, which helps the VM layer a lot.
+ */
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+                       unsigned long addr, struct pt_regs *regs)
+{
+       struct vm_area_struct *vma;
+
+       if (!get_mmap_lock_carefully(mm, regs))
+               return NULL;
+
+       vma = find_vma(mm, addr);
+       if (likely(vma && (vma->vm_start <= addr)))
+               return vma;
+
+       /*
+        * Well, dang. We might still be successful, but only
+        * if we can extend a vma to do so.
+        */
+       if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
+               mmap_read_unlock(mm);
+               return NULL;
+       }
+
+       /*
+        * We can try to upgrade the mmap lock atomically,
+        * in which case we can continue to use the vma
+        * we already looked up.
+        *
+        * Otherwise we'll have to drop the mmap lock and
+        * re-take it, and also look up the vma again,
+        * re-checking it.
+        */
+       if (!mmap_upgrade_trylock(mm)) {
+               if (!upgrade_mmap_lock_carefully(mm, regs))
+                       return NULL;
+
+               vma = find_vma(mm, addr);
+               if (!vma)
+                       goto fail;
+               if (vma->vm_start <= addr)
+                       goto success;
+               if (!(vma->vm_flags & VM_GROWSDOWN))
+                       goto fail;
+       }
+
+       if (expand_stack_locked(vma, addr))
+               goto fail;
+
+success:
+       mmap_write_downgrade(mm);
+       return vma;
+
+fail:
+       mmap_write_unlock(mm);
+       return NULL;
+}
+#endif
+
  #ifndef __PAGETABLE_P4D_FOLDED
  /*
   * Allocate p4d page table.
@@ -5508,6 +5655,14 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
         if (mmap_read_lock_killable(mm))
                 return 0;
  
+       /* We might need to expand the stack to access it */
+       vma = vma_lookup(mm, addr);
+       if (!vma) {
+               vma = expand_stack(mm, addr);
+               if (!vma)
+                       return 0;
+       }
+
         /* ignore errors, just check how much was successfully transferred */
         while (len) {
                 int bytes, ret, offset;