WIP: update tizen_qemu_defconfig

[platform/kernel/linux-starfive.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index 747b7ea..2083078 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2843,10 +2843,16 @@ static inline int pte_unmap_same(struct vm_fault *vmf)
         return same;
  }
  
-static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
-                                      struct vm_fault *vmf)
+/*
+ * Return:
+ *     0:              copied succeeded
+ *     -EHWPOISON:     copy failed due to hwpoison in source page
+ *     -EAGAIN:        copied failed (some other reason)
+ */
+static inline int __wp_page_copy_user(struct page *dst, struct page *src,
+                                     struct vm_fault *vmf)
  {
-       bool ret;
+       int ret;
         void *kaddr;
         void __user *uaddr;
         bool locked = false;
@@ -2855,8 +2861,11 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
         unsigned long addr = vmf->address;
  
         if (likely(src)) {
-               copy_user_highpage(dst, src, addr, vma);
-               return true;
+               if (copy_mc_user_highpage(dst, src, addr, vma)) {
+                       memory_failure_queue(page_to_pfn(src), 0);
+                       return -EHWPOISON;
+               }
+               return 0;
         }
  
         /*
@@ -2883,7 +2892,7 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
                          * and update local tlb only
                          */
                         update_mmu_tlb(vma, addr, vmf->pte);
-                       ret = false;
+                       ret = -EAGAIN;
                         goto pte_unlock;
                 }
  
@@ -2908,7 +2917,7 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
                 if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
                         /* The PTE changed under us, update local tlb */
                         update_mmu_tlb(vma, addr, vmf->pte);
-                       ret = false;
+                       ret = -EAGAIN;
                         goto pte_unlock;
                 }
  
@@ -2927,7 +2936,7 @@ warn:
                 }
         }
  
-       ret = true;
+       ret = 0;
  
  pte_unlock:
         if (locked)
@@ -3099,6 +3108,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
         pte_t entry;
         int page_copied = 0;
         struct mmu_notifier_range range;
+       int ret;
  
         delayacct_wpcopy_start();
  
@@ -3116,19 +3126,21 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
                 if (!new_page)
                         goto oom;
  
-               if (!__wp_page_copy_user(new_page, old_page, vmf)) {
+               ret = __wp_page_copy_user(new_page, old_page, vmf);
+               if (ret) {
                         /*
                          * COW failed, if the fault was solved by other,
                          * it's fine. If not, userspace would re-fault on
                          * the same address and we will handle the fault
                          * from the second attempt.
+                        * The -EHWPOISON case will not be retried.
                          */
                         put_page(new_page);
                         if (old_page)
                                 put_page(old_page);
  
                         delayacct_wpcopy_end();
-                       return 0;
+                       return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
                 }
                 kmsan_copy_page_meta(new_page, old_page);
         }
@@ -3969,6 +3981,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
         }
  
         /*
+        * Some architectures may have to restore extra metadata to the page
+        * when reading from swap. This metadata may be indexed by swap entry
+        * so this must be called before swap_free().
+        */
+       arch_swap_restore(entry, folio);
+
+       /*
          * Remove the swap entry and conditionally try to free up the swapcache.
          * We're already holding a reference on the page but haven't mapped it
          * yet.
@@ -5246,6 +5265,125 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
  }
  EXPORT_SYMBOL_GPL(handle_mm_fault);
  
+#ifdef CONFIG_LOCK_MM_AND_FIND_VMA
+#include <linux/extable.h>
+
+static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
+{
+       /* Even if this succeeds, make it clear we *might* have slept */
+       if (likely(mmap_read_trylock(mm))) {
+               might_sleep();
+               return true;
+       }
+
+       if (regs && !user_mode(regs)) {
+               unsigned long ip = instruction_pointer(regs);
+               if (!search_exception_tables(ip))
+                       return false;
+       }
+
+       return !mmap_read_lock_killable(mm);
+}
+
+static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
+{
+       /*
+        * We don't have this operation yet.
+        *
+        * It should be easy enough to do: it's basically a
+        *    atomic_long_try_cmpxchg_acquire()
+        * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
+        * it also needs the proper lockdep magic etc.
+        */
+       return false;
+}
+
+static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
+{
+       mmap_read_unlock(mm);
+       if (regs && !user_mode(regs)) {
+               unsigned long ip = instruction_pointer(regs);
+               if (!search_exception_tables(ip))
+                       return false;
+       }
+       return !mmap_write_lock_killable(mm);
+}
+
+/*
+ * Helper for page fault handling.
+ *
+ * This is kind of equivalend to "mmap_read_lock()" followed
+ * by "find_extend_vma()", except it's a lot more careful about
+ * the locking (and will drop the lock on failure).
+ *
+ * For example, if we have a kernel bug that causes a page
+ * fault, we don't want to just use mmap_read_lock() to get
+ * the mm lock, because that would deadlock if the bug were
+ * to happen while we're holding the mm lock for writing.
+ *
+ * So this checks the exception tables on kernel faults in
+ * order to only do this all for instructions that are actually
+ * expected to fault.
+ *
+ * We can also actually take the mm lock for writing if we
+ * need to extend the vma, which helps the VM layer a lot.
+ */
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+                       unsigned long addr, struct pt_regs *regs)
+{
+       struct vm_area_struct *vma;
+
+       if (!get_mmap_lock_carefully(mm, regs))
+               return NULL;
+
+       vma = find_vma(mm, addr);
+       if (likely(vma && (vma->vm_start <= addr)))
+               return vma;
+
+       /*
+        * Well, dang. We might still be successful, but only
+        * if we can extend a vma to do so.
+        */
+       if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
+               mmap_read_unlock(mm);
+               return NULL;
+       }
+
+       /*
+        * We can try to upgrade the mmap lock atomically,
+        * in which case we can continue to use the vma
+        * we already looked up.
+        *
+        * Otherwise we'll have to drop the mmap lock and
+        * re-take it, and also look up the vma again,
+        * re-checking it.
+        */
+       if (!mmap_upgrade_trylock(mm)) {
+               if (!upgrade_mmap_lock_carefully(mm, regs))
+                       return NULL;
+
+               vma = find_vma(mm, addr);
+               if (!vma)
+                       goto fail;
+               if (vma->vm_start <= addr)
+                       goto success;
+               if (!(vma->vm_flags & VM_GROWSDOWN))
+                       goto fail;
+       }
+
+       if (expand_stack_locked(vma, addr))
+               goto fail;
+
+success:
+       mmap_write_downgrade(mm);
+       return vma;
+
+fail:
+       mmap_write_unlock(mm);
+       return NULL;
+}
+#endif
+
  #ifndef __PAGETABLE_P4D_FOLDED
  /*
   * Allocate p4d page table.
@@ -5517,6 +5655,14 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
         if (mmap_read_lock_killable(mm))
                 return 0;
  
+       /* We might need to expand the stack to access it */
+       vma = vma_lookup(mm, addr);
+       if (!vma) {
+               vma = expand_stack(mm, addr);
+               if (!vma)
+                       return 0;
+       }
+
         /* ignore errors, just check how much was successfully transferred */
         while (len) {
                 int bytes, ret, offset;