mm/hwpoison: fix error page recovered but reported "not recovered"
[platform/kernel/linux-rpi.git] / mm / userfaultfd.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  mm/userfaultfd.c
4  *
5  *  Copyright (C) 2015  Red Hat, Inc.
6  */
7
8 #include <linux/mm.h>
9 #include <linux/sched/signal.h>
10 #include <linux/pagemap.h>
11 #include <linux/rmap.h>
12 #include <linux/swap.h>
13 #include <linux/swapops.h>
14 #include <linux/userfaultfd_k.h>
15 #include <linux/mmu_notifier.h>
16 #include <linux/hugetlb.h>
17 #include <linux/shmem_fs.h>
18 #include <asm/tlbflush.h>
19 #include "internal.h"
20
21 static __always_inline
22 struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
23                                     unsigned long dst_start,
24                                     unsigned long len)
25 {
26         /*
27          * Make sure that the dst range is both valid and fully within a
28          * single existing vma.
29          */
30         struct vm_area_struct *dst_vma;
31
32         dst_vma = find_vma(dst_mm, dst_start);
33         if (!dst_vma)
34                 return NULL;
35
36         if (dst_start < dst_vma->vm_start ||
37             dst_start + len > dst_vma->vm_end)
38                 return NULL;
39
40         /*
41          * Check the vma is registered in uffd, this is required to
42          * enforce the VM_MAYWRITE check done at uffd registration
43          * time.
44          */
45         if (!dst_vma->vm_userfaultfd_ctx.ctx)
46                 return NULL;
47
48         return dst_vma;
49 }
50
51 /*
52  * Install PTEs, to map dst_addr (within dst_vma) to page.
53  *
54  * This function handles both MCOPY_ATOMIC_NORMAL and _CONTINUE for both shmem
55  * and anon, and for both shared and private VMAs.
56  */
57 int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
58                              struct vm_area_struct *dst_vma,
59                              unsigned long dst_addr, struct page *page,
60                              bool newly_allocated, bool wp_copy)
61 {
62         int ret;
63         pte_t _dst_pte, *dst_pte;
64         bool writable = dst_vma->vm_flags & VM_WRITE;
65         bool vm_shared = dst_vma->vm_flags & VM_SHARED;
66         bool page_in_cache = page->mapping;
67         spinlock_t *ptl;
68         struct inode *inode;
69         pgoff_t offset, max_off;
70
71         _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
72         if (page_in_cache && !vm_shared)
73                 writable = false;
74         if (writable || !page_in_cache)
75                 _dst_pte = pte_mkdirty(_dst_pte);
76         if (writable) {
77                 if (wp_copy)
78                         _dst_pte = pte_mkuffd_wp(_dst_pte);
79                 else
80                         _dst_pte = pte_mkwrite(_dst_pte);
81         }
82
83         dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
84
85         if (vma_is_shmem(dst_vma)) {
86                 /* serialize against truncate with the page table lock */
87                 inode = dst_vma->vm_file->f_inode;
88                 offset = linear_page_index(dst_vma, dst_addr);
89                 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
90                 ret = -EFAULT;
91                 if (unlikely(offset >= max_off))
92                         goto out_unlock;
93         }
94
95         ret = -EEXIST;
96         if (!pte_none(*dst_pte))
97                 goto out_unlock;
98
99         if (page_in_cache)
100                 page_add_file_rmap(page, false);
101         else
102                 page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
103
104         /*
105          * Must happen after rmap, as mm_counter() checks mapping (via
106          * PageAnon()), which is set by __page_set_anon_rmap().
107          */
108         inc_mm_counter(dst_mm, mm_counter(page));
109
110         if (newly_allocated)
111                 lru_cache_add_inactive_or_unevictable(page, dst_vma);
112
113         set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
114
115         /* No need to invalidate - it was non-present before */
116         update_mmu_cache(dst_vma, dst_addr, dst_pte);
117         ret = 0;
118 out_unlock:
119         pte_unmap_unlock(dst_pte, ptl);
120         return ret;
121 }
122
123 static int mcopy_atomic_pte(struct mm_struct *dst_mm,
124                             pmd_t *dst_pmd,
125                             struct vm_area_struct *dst_vma,
126                             unsigned long dst_addr,
127                             unsigned long src_addr,
128                             struct page **pagep,
129                             bool wp_copy)
130 {
131         void *page_kaddr;
132         int ret;
133         struct page *page;
134
135         if (!*pagep) {
136                 ret = -ENOMEM;
137                 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
138                 if (!page)
139                         goto out;
140
141                 page_kaddr = kmap_atomic(page);
142                 ret = copy_from_user(page_kaddr,
143                                      (const void __user *) src_addr,
144                                      PAGE_SIZE);
145                 kunmap_atomic(page_kaddr);
146
147                 /* fallback to copy_from_user outside mmap_lock */
148                 if (unlikely(ret)) {
149                         ret = -ENOENT;
150                         *pagep = page;
151                         /* don't free the page */
152                         goto out;
153                 }
154
155                 flush_dcache_page(page);
156         } else {
157                 page = *pagep;
158                 *pagep = NULL;
159         }
160
161         /*
162          * The memory barrier inside __SetPageUptodate makes sure that
163          * preceding stores to the page contents become visible before
164          * the set_pte_at() write.
165          */
166         __SetPageUptodate(page);
167
168         ret = -ENOMEM;
169         if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL))
170                 goto out_release;
171
172         ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
173                                        page, true, wp_copy);
174         if (ret)
175                 goto out_release;
176 out:
177         return ret;
178 out_release:
179         put_page(page);
180         goto out;
181 }
182
183 static int mfill_zeropage_pte(struct mm_struct *dst_mm,
184                               pmd_t *dst_pmd,
185                               struct vm_area_struct *dst_vma,
186                               unsigned long dst_addr)
187 {
188         pte_t _dst_pte, *dst_pte;
189         spinlock_t *ptl;
190         int ret;
191         pgoff_t offset, max_off;
192         struct inode *inode;
193
194         _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
195                                          dst_vma->vm_page_prot));
196         dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
197         if (dst_vma->vm_file) {
198                 /* the shmem MAP_PRIVATE case requires checking the i_size */
199                 inode = dst_vma->vm_file->f_inode;
200                 offset = linear_page_index(dst_vma, dst_addr);
201                 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
202                 ret = -EFAULT;
203                 if (unlikely(offset >= max_off))
204                         goto out_unlock;
205         }
206         ret = -EEXIST;
207         if (!pte_none(*dst_pte))
208                 goto out_unlock;
209         set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
210         /* No need to invalidate - it was non-present before */
211         update_mmu_cache(dst_vma, dst_addr, dst_pte);
212         ret = 0;
213 out_unlock:
214         pte_unmap_unlock(dst_pte, ptl);
215         return ret;
216 }
217
218 /* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
219 static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
220                                 pmd_t *dst_pmd,
221                                 struct vm_area_struct *dst_vma,
222                                 unsigned long dst_addr,
223                                 bool wp_copy)
224 {
225         struct inode *inode = file_inode(dst_vma->vm_file);
226         pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
227         struct page *page;
228         int ret;
229
230         ret = shmem_getpage(inode, pgoff, &page, SGP_READ);
231         if (ret)
232                 goto out;
233         if (!page) {
234                 ret = -EFAULT;
235                 goto out;
236         }
237
238         ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
239                                        page, false, wp_copy);
240         if (ret)
241                 goto out_release;
242
243         unlock_page(page);
244         ret = 0;
245 out:
246         return ret;
247 out_release:
248         unlock_page(page);
249         put_page(page);
250         goto out;
251 }
252
253 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
254 {
255         pgd_t *pgd;
256         p4d_t *p4d;
257         pud_t *pud;
258
259         pgd = pgd_offset(mm, address);
260         p4d = p4d_alloc(mm, pgd, address);
261         if (!p4d)
262                 return NULL;
263         pud = pud_alloc(mm, p4d, address);
264         if (!pud)
265                 return NULL;
266         /*
267          * Note that we didn't run this because the pmd was
268          * missing, the *pmd may be already established and in
269          * turn it may also be a trans_huge_pmd.
270          */
271         return pmd_alloc(mm, pud, address);
272 }
273
274 #ifdef CONFIG_HUGETLB_PAGE
275 /*
276  * __mcopy_atomic processing for HUGETLB vmas.  Note that this routine is
277  * called with mmap_lock held, it will release mmap_lock before returning.
278  */
279 static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
280                                               struct vm_area_struct *dst_vma,
281                                               unsigned long dst_start,
282                                               unsigned long src_start,
283                                               unsigned long len,
284                                               enum mcopy_atomic_mode mode)
285 {
286         int vm_shared = dst_vma->vm_flags & VM_SHARED;
287         ssize_t err;
288         pte_t *dst_pte;
289         unsigned long src_addr, dst_addr;
290         long copied;
291         struct page *page;
292         unsigned long vma_hpagesize;
293         pgoff_t idx;
294         u32 hash;
295         struct address_space *mapping;
296
297         /*
298          * There is no default zero huge page for all huge page sizes as
299          * supported by hugetlb.  A PMD_SIZE huge pages may exist as used
300          * by THP.  Since we can not reliably insert a zero page, this
301          * feature is not supported.
302          */
303         if (mode == MCOPY_ATOMIC_ZEROPAGE) {
304                 mmap_read_unlock(dst_mm);
305                 return -EINVAL;
306         }
307
308         src_addr = src_start;
309         dst_addr = dst_start;
310         copied = 0;
311         page = NULL;
312         vma_hpagesize = vma_kernel_pagesize(dst_vma);
313
314         /*
315          * Validate alignment based on huge page size
316          */
317         err = -EINVAL;
318         if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
319                 goto out_unlock;
320
321 retry:
322         /*
323          * On routine entry dst_vma is set.  If we had to drop mmap_lock and
324          * retry, dst_vma will be set to NULL and we must lookup again.
325          */
326         if (!dst_vma) {
327                 err = -ENOENT;
328                 dst_vma = find_dst_vma(dst_mm, dst_start, len);
329                 if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
330                         goto out_unlock;
331
332                 err = -EINVAL;
333                 if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
334                         goto out_unlock;
335
336                 vm_shared = dst_vma->vm_flags & VM_SHARED;
337         }
338
339         /*
340          * If not shared, ensure the dst_vma has a anon_vma.
341          */
342         err = -ENOMEM;
343         if (!vm_shared) {
344                 if (unlikely(anon_vma_prepare(dst_vma)))
345                         goto out_unlock;
346         }
347
348         while (src_addr < src_start + len) {
349                 BUG_ON(dst_addr >= dst_start + len);
350
351                 /*
352                  * Serialize via i_mmap_rwsem and hugetlb_fault_mutex.
353                  * i_mmap_rwsem ensures the dst_pte remains valid even
354                  * in the case of shared pmds.  fault mutex prevents
355                  * races with other faulting threads.
356                  */
357                 mapping = dst_vma->vm_file->f_mapping;
358                 i_mmap_lock_read(mapping);
359                 idx = linear_page_index(dst_vma, dst_addr);
360                 hash = hugetlb_fault_mutex_hash(mapping, idx);
361                 mutex_lock(&hugetlb_fault_mutex_table[hash]);
362
363                 err = -ENOMEM;
364                 dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize);
365                 if (!dst_pte) {
366                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
367                         i_mmap_unlock_read(mapping);
368                         goto out_unlock;
369                 }
370
371                 if (mode != MCOPY_ATOMIC_CONTINUE &&
372                     !huge_pte_none(huge_ptep_get(dst_pte))) {
373                         err = -EEXIST;
374                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
375                         i_mmap_unlock_read(mapping);
376                         goto out_unlock;
377                 }
378
379                 err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
380                                                dst_addr, src_addr, mode, &page);
381
382                 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
383                 i_mmap_unlock_read(mapping);
384
385                 cond_resched();
386
387                 if (unlikely(err == -ENOENT)) {
388                         mmap_read_unlock(dst_mm);
389                         BUG_ON(!page);
390
391                         err = copy_huge_page_from_user(page,
392                                                 (const void __user *)src_addr,
393                                                 vma_hpagesize / PAGE_SIZE,
394                                                 true);
395                         if (unlikely(err)) {
396                                 err = -EFAULT;
397                                 goto out;
398                         }
399                         mmap_read_lock(dst_mm);
400
401                         dst_vma = NULL;
402                         goto retry;
403                 } else
404                         BUG_ON(page);
405
406                 if (!err) {
407                         dst_addr += vma_hpagesize;
408                         src_addr += vma_hpagesize;
409                         copied += vma_hpagesize;
410
411                         if (fatal_signal_pending(current))
412                                 err = -EINTR;
413                 }
414                 if (err)
415                         break;
416         }
417
418 out_unlock:
419         mmap_read_unlock(dst_mm);
420 out:
421         if (page)
422                 put_page(page);
423         BUG_ON(copied < 0);
424         BUG_ON(err > 0);
425         BUG_ON(!copied && !err);
426         return copied ? copied : err;
427 }
428 #else /* !CONFIG_HUGETLB_PAGE */
429 /* fail at build time if gcc attempts to use this */
430 extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
431                                       struct vm_area_struct *dst_vma,
432                                       unsigned long dst_start,
433                                       unsigned long src_start,
434                                       unsigned long len,
435                                       enum mcopy_atomic_mode mode);
436 #endif /* CONFIG_HUGETLB_PAGE */
437
438 static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
439                                                 pmd_t *dst_pmd,
440                                                 struct vm_area_struct *dst_vma,
441                                                 unsigned long dst_addr,
442                                                 unsigned long src_addr,
443                                                 struct page **page,
444                                                 enum mcopy_atomic_mode mode,
445                                                 bool wp_copy)
446 {
447         ssize_t err;
448
449         if (mode == MCOPY_ATOMIC_CONTINUE) {
450                 return mcontinue_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
451                                             wp_copy);
452         }
453
454         /*
455          * The normal page fault path for a shmem will invoke the
456          * fault, fill the hole in the file and COW it right away. The
457          * result generates plain anonymous memory. So when we are
458          * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
459          * generate anonymous memory directly without actually filling
460          * the hole. For the MAP_PRIVATE case the robustness check
461          * only happens in the pagetable (to verify it's still none)
462          * and not in the radix tree.
463          */
464         if (!(dst_vma->vm_flags & VM_SHARED)) {
465                 if (mode == MCOPY_ATOMIC_NORMAL)
466                         err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
467                                                dst_addr, src_addr, page,
468                                                wp_copy);
469                 else
470                         err = mfill_zeropage_pte(dst_mm, dst_pmd,
471                                                  dst_vma, dst_addr);
472         } else {
473                 VM_WARN_ON_ONCE(wp_copy);
474                 err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
475                                              dst_addr, src_addr,
476                                              mode != MCOPY_ATOMIC_NORMAL,
477                                              page);
478         }
479
480         return err;
481 }
482
483 static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
484                                               unsigned long dst_start,
485                                               unsigned long src_start,
486                                               unsigned long len,
487                                               enum mcopy_atomic_mode mcopy_mode,
488                                               atomic_t *mmap_changing,
489                                               __u64 mode)
490 {
491         struct vm_area_struct *dst_vma;
492         ssize_t err;
493         pmd_t *dst_pmd;
494         unsigned long src_addr, dst_addr;
495         long copied;
496         struct page *page;
497         bool wp_copy;
498
499         /*
500          * Sanitize the command parameters:
501          */
502         BUG_ON(dst_start & ~PAGE_MASK);
503         BUG_ON(len & ~PAGE_MASK);
504
505         /* Does the address range wrap, or is the span zero-sized? */
506         BUG_ON(src_start + len <= src_start);
507         BUG_ON(dst_start + len <= dst_start);
508
509         src_addr = src_start;
510         dst_addr = dst_start;
511         copied = 0;
512         page = NULL;
513 retry:
514         mmap_read_lock(dst_mm);
515
516         /*
517          * If memory mappings are changing because of non-cooperative
518          * operation (e.g. mremap) running in parallel, bail out and
519          * request the user to retry later
520          */
521         err = -EAGAIN;
522         if (mmap_changing && atomic_read(mmap_changing))
523                 goto out_unlock;
524
525         /*
526          * Make sure the vma is not shared, that the dst range is
527          * both valid and fully within a single existing vma.
528          */
529         err = -ENOENT;
530         dst_vma = find_dst_vma(dst_mm, dst_start, len);
531         if (!dst_vma)
532                 goto out_unlock;
533
534         err = -EINVAL;
535         /*
536          * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
537          * it will overwrite vm_ops, so vma_is_anonymous must return false.
538          */
539         if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
540             dst_vma->vm_flags & VM_SHARED))
541                 goto out_unlock;
542
543         /*
544          * validate 'mode' now that we know the dst_vma: don't allow
545          * a wrprotect copy if the userfaultfd didn't register as WP.
546          */
547         wp_copy = mode & UFFDIO_COPY_MODE_WP;
548         if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
549                 goto out_unlock;
550
551         /*
552          * If this is a HUGETLB vma, pass off to appropriate routine
553          */
554         if (is_vm_hugetlb_page(dst_vma))
555                 return  __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
556                                                 src_start, len, mcopy_mode);
557
558         if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
559                 goto out_unlock;
560         if (!vma_is_shmem(dst_vma) && mcopy_mode == MCOPY_ATOMIC_CONTINUE)
561                 goto out_unlock;
562
563         /*
564          * Ensure the dst_vma has a anon_vma or this page
565          * would get a NULL anon_vma when moved in the
566          * dst_vma.
567          */
568         err = -ENOMEM;
569         if (!(dst_vma->vm_flags & VM_SHARED) &&
570             unlikely(anon_vma_prepare(dst_vma)))
571                 goto out_unlock;
572
573         while (src_addr < src_start + len) {
574                 pmd_t dst_pmdval;
575
576                 BUG_ON(dst_addr >= dst_start + len);
577
578                 dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
579                 if (unlikely(!dst_pmd)) {
580                         err = -ENOMEM;
581                         break;
582                 }
583
584                 dst_pmdval = pmd_read_atomic(dst_pmd);
585                 /*
586                  * If the dst_pmd is mapped as THP don't
587                  * override it and just be strict.
588                  */
589                 if (unlikely(pmd_trans_huge(dst_pmdval))) {
590                         err = -EEXIST;
591                         break;
592                 }
593                 if (unlikely(pmd_none(dst_pmdval)) &&
594                     unlikely(__pte_alloc(dst_mm, dst_pmd))) {
595                         err = -ENOMEM;
596                         break;
597                 }
598                 /* If an huge pmd materialized from under us fail */
599                 if (unlikely(pmd_trans_huge(*dst_pmd))) {
600                         err = -EFAULT;
601                         break;
602                 }
603
604                 BUG_ON(pmd_none(*dst_pmd));
605                 BUG_ON(pmd_trans_huge(*dst_pmd));
606
607                 err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
608                                        src_addr, &page, mcopy_mode, wp_copy);
609                 cond_resched();
610
611                 if (unlikely(err == -ENOENT)) {
612                         void *page_kaddr;
613
614                         mmap_read_unlock(dst_mm);
615                         BUG_ON(!page);
616
617                         page_kaddr = kmap(page);
618                         err = copy_from_user(page_kaddr,
619                                              (const void __user *) src_addr,
620                                              PAGE_SIZE);
621                         kunmap(page);
622                         if (unlikely(err)) {
623                                 err = -EFAULT;
624                                 goto out;
625                         }
626                         flush_dcache_page(page);
627                         goto retry;
628                 } else
629                         BUG_ON(page);
630
631                 if (!err) {
632                         dst_addr += PAGE_SIZE;
633                         src_addr += PAGE_SIZE;
634                         copied += PAGE_SIZE;
635
636                         if (fatal_signal_pending(current))
637                                 err = -EINTR;
638                 }
639                 if (err)
640                         break;
641         }
642
643 out_unlock:
644         mmap_read_unlock(dst_mm);
645 out:
646         if (page)
647                 put_page(page);
648         BUG_ON(copied < 0);
649         BUG_ON(err > 0);
650         BUG_ON(!copied && !err);
651         return copied ? copied : err;
652 }
653
654 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
655                      unsigned long src_start, unsigned long len,
656                      atomic_t *mmap_changing, __u64 mode)
657 {
658         return __mcopy_atomic(dst_mm, dst_start, src_start, len,
659                               MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
660 }
661
662 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
663                        unsigned long len, atomic_t *mmap_changing)
664 {
665         return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
666                               mmap_changing, 0);
667 }
668
669 ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
670                        unsigned long len, atomic_t *mmap_changing)
671 {
672         return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
673                               mmap_changing, 0);
674 }
675
676 int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
677                         unsigned long len, bool enable_wp,
678                         atomic_t *mmap_changing)
679 {
680         struct vm_area_struct *dst_vma;
681         pgprot_t newprot;
682         int err;
683
684         /*
685          * Sanitize the command parameters:
686          */
687         BUG_ON(start & ~PAGE_MASK);
688         BUG_ON(len & ~PAGE_MASK);
689
690         /* Does the address range wrap, or is the span zero-sized? */
691         BUG_ON(start + len <= start);
692
693         mmap_read_lock(dst_mm);
694
695         /*
696          * If memory mappings are changing because of non-cooperative
697          * operation (e.g. mremap) running in parallel, bail out and
698          * request the user to retry later
699          */
700         err = -EAGAIN;
701         if (mmap_changing && atomic_read(mmap_changing))
702                 goto out_unlock;
703
704         err = -ENOENT;
705         dst_vma = find_dst_vma(dst_mm, start, len);
706         /*
707          * Make sure the vma is not shared, that the dst range is
708          * both valid and fully within a single existing vma.
709          */
710         if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
711                 goto out_unlock;
712         if (!userfaultfd_wp(dst_vma))
713                 goto out_unlock;
714         if (!vma_is_anonymous(dst_vma))
715                 goto out_unlock;
716
717         if (enable_wp)
718                 newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
719         else
720                 newprot = vm_get_page_prot(dst_vma->vm_flags);
721
722         change_protection(dst_vma, start, start + len, newprot,
723                           enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
724
725         err = 0;
726 out_unlock:
727         mmap_read_unlock(dst_mm);
728         return err;
729 }