WIP: update tizen_qemu_defconfig
[platform/kernel/linux-starfive.git] / mm / mmap.c
index 74a84eb..41a240b 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -226,8 +226,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
                /* Search one past newbrk */
                mas_set(&mas, newbrk);
                brkvma = mas_find(&mas, oldbrk);
-               BUG_ON(brkvma == NULL);
-               if (brkvma->vm_start >= oldbrk)
+               if (!brkvma || brkvma->vm_start >= oldbrk)
                        goto out; /* mapping intersects with an existing non-brk vma. */
                /*
                 * mm->brk must be protected by write mmap_lock.
@@ -768,7 +767,8 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
        }
        if (end != vma->vm_end) {
                if (vma->vm_end > end) {
-                       if (!insert || (insert->vm_start != end)) {
+                       if ((vma->vm_end + adjust_next != end) &&
+                           (!insert || (insert->vm_start != end))) {
                                vma_mas_szero(&mas, end, vma->vm_end);
                                mas_reset(&mas);
                                VM_WARN_ON(insert &&
@@ -1525,6 +1525,10 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
        if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma))
                return 1;
 
+       /* Do we need write faults for uffd-wp tracking? */
+       if (userfaultfd_wp(vma))
+               return 1;
+
        /* Specialty mapping? */
        if (vm_flags & VM_PFNMAP)
                return 0;
@@ -1562,7 +1566,8 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
  */
 static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 {
-       unsigned long length, gap;
+       unsigned long length, gap, low_limit;
+       struct vm_area_struct *tmp;
 
        MA_STATE(mas, &current->mm->mm_mt, 0, 0);
 
@@ -1571,12 +1576,29 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
        if (length < info->length)
                return -ENOMEM;
 
-       if (mas_empty_area(&mas, info->low_limit, info->high_limit - 1,
-                                 length))
+       low_limit = info->low_limit;
+retry:
+       if (mas_empty_area(&mas, low_limit, info->high_limit - 1, length))
                return -ENOMEM;
 
        gap = mas.index;
        gap += (info->align_offset - gap) & info->align_mask;
+       tmp = mas_next(&mas, ULONG_MAX);
+       if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+               if (vm_start_gap(tmp) < gap + length - 1) {
+                       low_limit = tmp->vm_end;
+                       mas_reset(&mas);
+                       goto retry;
+               }
+       } else {
+               tmp = mas_prev(&mas, 0);
+               if (tmp && vm_end_gap(tmp) > gap) {
+                       low_limit = vm_end_gap(tmp);
+                       mas_reset(&mas);
+                       goto retry;
+               }
+       }
+
        return gap;
 }
 
@@ -1592,7 +1614,8 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
  */
 static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 {
-       unsigned long length, gap;
+       unsigned long length, gap, high_limit, gap_end;
+       struct vm_area_struct *tmp;
 
        MA_STATE(mas, &current->mm->mm_mt, 0, 0);
        /* Adjust search length to account for worst case alignment overhead */
@@ -1600,12 +1623,31 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
        if (length < info->length)
                return -ENOMEM;
 
-       if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1,
+       high_limit = info->high_limit;
+retry:
+       if (mas_empty_area_rev(&mas, info->low_limit, high_limit - 1,
                                length))
                return -ENOMEM;
 
        gap = mas.last + 1 - info->length;
        gap -= (gap - info->align_offset) & info->align_mask;
+       gap_end = mas.last;
+       tmp = mas_next(&mas, ULONG_MAX);
+       if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+               if (vm_start_gap(tmp) <= gap_end) {
+                       high_limit = vm_start_gap(tmp);
+                       mas_reset(&mas);
+                       goto retry;
+               }
+       } else {
+               tmp = mas_prev(&mas, 0);
+               if (tmp && vm_end_gap(tmp) > gap) {
+                       high_limit = tmp->vm_start;
+                       mas_reset(&mas);
+                       goto retry;
+               }
+       }
+
        return gap;
 }
 
@@ -1779,9 +1821,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                 */
                pgoff = 0;
                get_area = shmem_get_unmapped_area;
-       } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
-               /* Ensures that larger anonymous mappings are THP aligned. */
-               get_area = thp_get_unmapped_area;
        }
 
        addr = get_area(file, addr, len, pgoff, flags);
@@ -1907,7 +1946,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
  * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
  * vma is the last one with address > vma->vm_end.  Have to extend vma.
  */
-int expand_upwards(struct vm_area_struct *vma, unsigned long address)
+static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
        struct mm_struct *mm = vma->vm_mm;
        struct vm_area_struct *next;
@@ -1998,6 +2037,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 
 /*
  * vma is the first one with address < vma->vm_start.  Have to extend vma.
+ * mmap_lock held for writing.
  */
 int expand_downwards(struct vm_area_struct *vma, unsigned long address)
 {
@@ -2006,16 +2046,20 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
        struct vm_area_struct *prev;
        int error = 0;
 
+       if (!(vma->vm_flags & VM_GROWSDOWN))
+               return -EFAULT;
+
        address &= PAGE_MASK;
-       if (address < mmap_min_addr)
+       if (address < mmap_min_addr || address < FIRST_USER_ADDRESS)
                return -EPERM;
 
        /* Enforce stack_guard_gap */
        prev = mas_prev(&mas, 0);
        /* Check that both stack segments have the same anon_vma? */
-       if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
-                       vma_is_accessible(prev)) {
-               if (address - prev->vm_end < stack_guard_gap)
+       if (prev) {
+               if (!(prev->vm_flags & VM_GROWSDOWN) &&
+                   vma_is_accessible(prev) &&
+                   (address - prev->vm_end < stack_guard_gap))
                        return -ENOMEM;
        }
 
@@ -2094,13 +2138,12 @@ static int __init cmdline_parse_stack_guard_gap(char *p)
 __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
 
 #ifdef CONFIG_STACK_GROWSUP
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)
 {
        return expand_upwards(vma, address);
 }
 
-struct vm_area_struct *
-find_extend_vma(struct mm_struct *mm, unsigned long addr)
+struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr)
 {
        struct vm_area_struct *vma, *prev;
 
@@ -2108,20 +2151,23 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
        vma = find_vma_prev(mm, addr, &prev);
        if (vma && (vma->vm_start <= addr))
                return vma;
-       if (!prev || expand_stack(prev, addr))
+       if (!prev)
+               return NULL;
+       if (expand_stack_locked(prev, addr))
                return NULL;
        if (prev->vm_flags & VM_LOCKED)
                populate_vma_page_range(prev, addr, prev->vm_end, NULL);
        return prev;
 }
 #else
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)
 {
+       if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
+               return -EINVAL;
        return expand_downwards(vma, address);
 }
 
-struct vm_area_struct *
-find_extend_vma(struct mm_struct *mm, unsigned long addr)
+struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr)
 {
        struct vm_area_struct *vma;
        unsigned long start;
@@ -2132,10 +2178,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
                return NULL;
        if (vma->vm_start <= addr)
                return vma;
-       if (!(vma->vm_flags & VM_GROWSDOWN))
-               return NULL;
        start = vma->vm_start;
-       if (expand_stack(vma, addr))
+       if (expand_stack_locked(vma, addr))
                return NULL;
        if (vma->vm_flags & VM_LOCKED)
                populate_vma_page_range(vma, addr, start, NULL);
@@ -2143,7 +2187,91 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 }
 #endif
 
-EXPORT_SYMBOL_GPL(find_extend_vma);
+/*
+ * IA64 has some horrid mapping rules: it can expand both up and down,
+ * but with various special rules.
+ *
+ * We'll get rid of this architecture eventually, so the ugliness is
+ * temporary.
+ */
+#ifdef CONFIG_IA64
+static inline bool vma_expand_ok(struct vm_area_struct *vma, unsigned long addr)
+{
+       return REGION_NUMBER(addr) == REGION_NUMBER(vma->vm_start) &&
+               REGION_OFFSET(addr) < RGN_MAP_LIMIT;
+}
+
+/*
+ * IA64 stacks grow down, but there's a special register backing store
+ * that can grow up. Only sequentially, though, so the new address must
+ * match vm_end.
+ */
+static inline int vma_expand_up(struct vm_area_struct *vma, unsigned long addr)
+{
+       if (!vma_expand_ok(vma, addr))
+               return -EFAULT;
+       if (vma->vm_end != (addr & PAGE_MASK))
+               return -EFAULT;
+       return expand_upwards(vma, addr);
+}
+
+static inline bool vma_expand_down(struct vm_area_struct *vma, unsigned long addr)
+{
+       if (!vma_expand_ok(vma, addr))
+               return -EFAULT;
+       return expand_downwards(vma, addr);
+}
+
+#elif defined(CONFIG_STACK_GROWSUP)
+
+#define vma_expand_up(vma,addr) expand_upwards(vma, addr)
+#define vma_expand_down(vma, addr) (-EFAULT)
+
+#else
+
+#define vma_expand_up(vma,addr) (-EFAULT)
+#define vma_expand_down(vma, addr) expand_downwards(vma, addr)
+
+#endif
+
+/*
+ * expand_stack(): legacy interface for page faulting. Don't use unless
+ * you have to.
+ *
+ * This is called with the mm locked for reading, drops the lock, takes
+ * the lock for writing, tries to look up a vma again, expands it if
+ * necessary, and downgrades the lock to reading again.
+ *
+ * If no vma is found or it can't be expanded, it returns NULL and has
+ * dropped the lock.
+ */
+struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr)
+{
+       struct vm_area_struct *vma, *prev;
+
+       mmap_read_unlock(mm);
+       if (mmap_write_lock_killable(mm))
+               return NULL;
+
+       vma = find_vma_prev(mm, addr, &prev);
+       if (vma && vma->vm_start <= addr)
+               goto success;
+
+       if (prev && !vma_expand_up(prev, addr)) {
+               vma = prev;
+               goto success;
+       }
+
+       if (vma && !vma_expand_down(vma, addr))
+               goto success;
+
+       mmap_write_unlock(mm);
+       return NULL;
+
+success:
+       mmap_write_downgrade(mm);
+       return vma;
+}
 
 /*
  * Ok - we have the memory areas we should free on a maple tree so release them,
@@ -2273,19 +2401,6 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
        return __split_vma(mm, vma, addr, new_below);
 }
 
-static inline int munmap_sidetree(struct vm_area_struct *vma,
-                                  struct ma_state *mas_detach)
-{
-       mas_set_range(mas_detach, vma->vm_start, vma->vm_end - 1);
-       if (mas_store_gfp(mas_detach, vma, GFP_KERNEL))
-               return -ENOMEM;
-
-       if (vma->vm_flags & VM_LOCKED)
-               vma->vm_mm->locked_vm -= vma_pages(vma);
-
-       return 0;
-}
-
 /*
  * do_mas_align_munmap() - munmap the aligned region from @start to @end.
  * @mas: The maple_state, ideally set up to alter the correct tree location.
@@ -2307,8 +2422,9 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
        struct maple_tree mt_detach;
        int count = 0;
        int error = -ENOMEM;
+       unsigned long locked_vm = 0;
        MA_STATE(mas_detach, &mt_detach, 0, 0);
-       mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN);
+       mt_init_flags(&mt_detach, mas->tree->ma_flags & MT_FLAGS_LOCK_MASK);
        mt_set_external_lock(&mt_detach, &mm->mmap_lock);
 
        if (mas_preallocate(mas, vma, GFP_KERNEL))
@@ -2365,18 +2481,24 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
 
                        mas_set(mas, end);
                        split = mas_prev(mas, 0);
-                       error = munmap_sidetree(split, &mas_detach);
+                       mas_set_range(&mas_detach, split->vm_start, split->vm_end - 1);
+                       error = mas_store_gfp(&mas_detach, split, GFP_KERNEL);
                        if (error)
-                               goto munmap_sidetree_failed;
+                               goto munmap_gather_failed;
+                       if (split->vm_flags & VM_LOCKED)
+                               locked_vm += vma_pages(split);
 
                        count++;
                        if (vma == next)
                                vma = split;
                        break;
                }
-               error = munmap_sidetree(next, &mas_detach);
+               mas_set_range(&mas_detach, next->vm_start, next->vm_end - 1);
+               error = mas_store_gfp(&mas_detach, next, GFP_KERNEL);
                if (error)
-                       goto munmap_sidetree_failed;
+                       goto munmap_gather_failed;
+               if (next->vm_flags & VM_LOCKED)
+                       locked_vm += vma_pages(next);
 
                count++;
 #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
@@ -2425,7 +2547,10 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                mas_set_range(mas, start, end - 1);
        }
 #endif
+       /* Point of no return */
        mas_store_prealloc(mas, NULL);
+
+       mm->locked_vm -= locked_vm;
        mm->map_count -= count;
        /*
         * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
@@ -2452,7 +2577,7 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
        return downgrade ? 1 : 0;
 
 userfaultfd_error:
-munmap_sidetree_failed:
+munmap_gather_failed:
 end_split_failed:
        __mt_destroy(&mt_detach);
 start_split_failed:
@@ -2949,9 +3074,9 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
         * Expand the existing vma if possible; Note that singular lists do not
         * occur after forking, so the expand will only happen on new VMAs.
         */
-       if (vma &&
-           (!vma->anon_vma || list_is_singular(&vma->anon_vma_chain)) &&
-           ((vma->vm_flags & ~VM_SOFTDIRTY) == flags)) {
+       if (vma && vma->vm_end == addr && !vma_policy(vma) &&
+           can_vma_merge_after(vma, flags, NULL, NULL,
+                               addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) {
                mas_set_range(mas, vma->vm_start, addr + len - 1);
                if (mas_preallocate(mas, vma, GFP_KERNEL))
                        return -ENOMEM;
@@ -3038,11 +3163,6 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
                goto munmap_failed;
 
        vma = mas_prev(&mas, 0);
-       if (!vma || vma->vm_end != addr || vma_policy(vma) ||
-           !can_vma_merge_after(vma, flags, NULL, NULL,
-                                addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL))
-               vma = NULL;
-
        ret = do_brk_flags(&mas, vma, addr, len, flags);
        populate = ((mm->def_flags & VM_LOCKED) != 0);
        mmap_write_unlock(mm);
@@ -3100,6 +3220,7 @@ void exit_mmap(struct mm_struct *mm)
         */
        set_bit(MMF_OOM_SKIP, &mm->flags);
        mmap_write_lock(mm);
+       mt_clear_in_rcu(&mm->mm_mt);
        free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS,
                      USER_PGTABLES_CEILING);
        tlb_finish_mmu(&tlb);