mm/userfaultfd.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  *  mm/userfaultfd.c
   4  *
   5  *  Copyright (C) 2015  Red Hat, Inc.
   6  */
   7
   8 #include <linux/mm.h>
   9 #include <linux/sched/signal.h>
  10 #include <linux/pagemap.h>
  11 #include <linux/rmap.h>
  12 #include <linux/swap.h>
  13 #include <linux/swapops.h>
  14 #include <linux/userfaultfd_k.h>
  15 #include <linux/mmu_notifier.h>
  16 #include <linux/hugetlb.h>
  17 #include <linux/shmem_fs.h>
  18 #include <asm/tlbflush.h>
  19 #include "internal.h"
  20
  21 static __always_inline
  22 struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
  23                                     unsigned long dst_start,
  24                                     unsigned long len)
  25 {
  26         /*
  27          * Make sure that the dst range is both valid and fully within a
  28          * single existing vma.
  29          */
  30         struct vm_area_struct *dst_vma;
  31
  32         dst_vma = find_vma(dst_mm, dst_start);
  33         if (!dst_vma)
  34                 return NULL;
  35
  36         if (dst_start < dst_vma->vm_start ||
  37             dst_start + len > dst_vma->vm_end)
  38                 return NULL;
  39
  40         /*
  41          * Check the vma is registered in uffd, this is required to
  42          * enforce the VM_MAYWRITE check done at uffd registration
  43          * time.
  44          */
  45         if (!dst_vma->vm_userfaultfd_ctx.ctx)
  46                 return NULL;
  47
  48         return dst_vma;
  49 }
  50
  51 /*
  52  * Install PTEs, to map dst_addr (within dst_vma) to page.
  53  *
  54  * This function handles both MCOPY_ATOMIC_NORMAL and _CONTINUE for both shmem
  55  * and anon, and for both shared and private VMAs.
  56  */
  57 int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
  58                              struct vm_area_struct *dst_vma,
  59                              unsigned long dst_addr, struct page *page,
  60                              bool newly_allocated, bool wp_copy)
  61 {
  62         int ret;
  63         pte_t _dst_pte, *dst_pte;
  64         bool writable = dst_vma->vm_flags & VM_WRITE;
  65         bool vm_shared = dst_vma->vm_flags & VM_SHARED;
  66         bool page_in_cache = page->mapping;
  67         spinlock_t *ptl;
  68         struct inode *inode;
  69         pgoff_t offset, max_off;
  70
  71         _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
  72         if (page_in_cache && !vm_shared)
  73                 writable = false;
  74         if (writable || !page_in_cache)
  75                 _dst_pte = pte_mkdirty(_dst_pte);
  76         if (writable) {
  77                 if (wp_copy)
  78                         _dst_pte = pte_mkuffd_wp(_dst_pte);
  79                 else
  80                         _dst_pte = pte_mkwrite(_dst_pte);
  81         }
  82
  83         dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
  84
  85         if (vma_is_shmem(dst_vma)) {
  86                 /* serialize against truncate with the page table lock */
  87                 inode = dst_vma->vm_file->f_inode;
  88                 offset = linear_page_index(dst_vma, dst_addr);
  89                 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
  90                 ret = -EFAULT;
  91                 if (unlikely(offset >= max_off))
  92                         goto out_unlock;
  93         }
  94
  95         ret = -EEXIST;
  96         if (!pte_none(*dst_pte))
  97                 goto out_unlock;
  98
  99         if (page_in_cache)
 100                 page_add_file_rmap(page, false);
 101         else
 102                 page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
 103
 104         /*
 105          * Must happen after rmap, as mm_counter() checks mapping (via
 106          * PageAnon()), which is set by __page_set_anon_rmap().
 107          */
 108         inc_mm_counter(dst_mm, mm_counter(page));
 109
 110         if (newly_allocated)
 111                 lru_cache_add_inactive_or_unevictable(page, dst_vma);
 112
 113         set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
 114
 115         /* No need to invalidate - it was non-present before */
 116         update_mmu_cache(dst_vma, dst_addr, dst_pte);
 117         ret = 0;
 118 out_unlock:
 119         pte_unmap_unlock(dst_pte, ptl);
 120         return ret;
 121 }
 122
 123 static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 124                             pmd_t *dst_pmd,
 125                             struct vm_area_struct *dst_vma,
 126                             unsigned long dst_addr,
 127                             unsigned long src_addr,
 128                             struct page **pagep,
 129                             bool wp_copy)
 130 {
 131         void *page_kaddr;
 132         int ret;
 133         struct page *page;
 134
 135         if (!*pagep) {
 136                 ret = -ENOMEM;
 137                 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
 138                 if (!page)
 139                         goto out;
 140
 141                 page_kaddr = kmap_atomic(page);
 142                 ret = copy_from_user(page_kaddr,
 143                                      (const void __user *) src_addr,
 144                                      PAGE_SIZE);
 145                 kunmap_atomic(page_kaddr);
 146
 147                 /* fallback to copy_from_user outside mmap_lock */
 148                 if (unlikely(ret)) {
 149                         ret = -ENOENT;
 150                         *pagep = page;
 151                         /* don't free the page */
 152                         goto out;
 153                 }
 154
 155                 flush_dcache_page(page);
 156         } else {
 157                 page = *pagep;
 158                 *pagep = NULL;
 159         }
 160
 161         /*
 162          * The memory barrier inside __SetPageUptodate makes sure that
 163          * preceding stores to the page contents become visible before
 164          * the set_pte_at() write.
 165          */
 166         __SetPageUptodate(page);
 167
 168         ret = -ENOMEM;
 169         if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL))
 170                 goto out_release;
 171
 172         ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
 173                                        page, true, wp_copy);
 174         if (ret)
 175                 goto out_release;
 176 out:
 177         return ret;
 178 out_release:
 179         put_page(page);
 180         goto out;
 181 }
 182
 183 static int mfill_zeropage_pte(struct mm_struct *dst_mm,
 184                               pmd_t *dst_pmd,
 185                               struct vm_area_struct *dst_vma,
 186                               unsigned long dst_addr)
 187 {
 188         pte_t _dst_pte, *dst_pte;
 189         spinlock_t *ptl;
 190         int ret;
 191         pgoff_t offset, max_off;
 192         struct inode *inode;
 193
 194         _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
 195                                          dst_vma->vm_page_prot));
 196         dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
 197         if (dst_vma->vm_file) {
 198                 /* the shmem MAP_PRIVATE case requires checking the i_size */
 199                 inode = dst_vma->vm_file->f_inode;
 200                 offset = linear_page_index(dst_vma, dst_addr);
 201                 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
 202                 ret = -EFAULT;
 203                 if (unlikely(offset >= max_off))
 204                         goto out_unlock;
 205         }
 206         ret = -EEXIST;
 207         if (!pte_none(*dst_pte))
 208                 goto out_unlock;
 209         set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
 210         /* No need to invalidate - it was non-present before */
 211         update_mmu_cache(dst_vma, dst_addr, dst_pte);
 212         ret = 0;
 213 out_unlock:
 214         pte_unmap_unlock(dst_pte, ptl);
 215         return ret;
 216 }
 217
 218 /* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */
 219 static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
 220                                 pmd_t *dst_pmd,
 221                                 struct vm_area_struct *dst_vma,
 222                                 unsigned long dst_addr,
 223                                 bool wp_copy)
 224 {
 225         struct inode *inode = file_inode(dst_vma->vm_file);
 226         pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
 227         struct page *page;
 228         int ret;
 229
 230         ret = shmem_getpage(inode, pgoff, &page, SGP_NOALLOC);
 231         /* Our caller expects us to return -EFAULT if we failed to find page. */
 232         if (ret == -ENOENT)
 233                 ret = -EFAULT;
 234         if (ret)
 235                 goto out;
 236         if (!page) {
 237                 ret = -EFAULT;
 238                 goto out;
 239         }
 240
 241         ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
 242                                        page, false, wp_copy);
 243         if (ret)
 244                 goto out_release;
 245
 246         unlock_page(page);
 247         ret = 0;
 248 out:
 249         return ret;
 250 out_release:
 251         unlock_page(page);
 252         put_page(page);
 253         goto out;
 254 }
 255
 256 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
 257 {
 258         pgd_t *pgd;
 259         p4d_t *p4d;
 260         pud_t *pud;
 261
 262         pgd = pgd_offset(mm, address);
 263         p4d = p4d_alloc(mm, pgd, address);
 264         if (!p4d)
 265                 return NULL;
 266         pud = pud_alloc(mm, p4d, address);
 267         if (!pud)
 268                 return NULL;
 269         /*
 270          * Note that we didn't run this because the pmd was
 271          * missing, the *pmd may be already established and in
 272          * turn it may also be a trans_huge_pmd.
 273          */
 274         return pmd_alloc(mm, pud, address);
 275 }
 276
 277 #ifdef CONFIG_HUGETLB_PAGE
 278 /*
 279  * __mcopy_atomic processing for HUGETLB vmas.  Note that this routine is
 280  * called with mmap_lock held, it will release mmap_lock before returning.
 281  */
 282 static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 283                                               struct vm_area_struct *dst_vma,
 284                                               unsigned long dst_start,
 285                                               unsigned long src_start,
 286                                               unsigned long len,
 287                                               enum mcopy_atomic_mode mode)
 288 {
 289         int vm_shared = dst_vma->vm_flags & VM_SHARED;
 290         ssize_t err;
 291         pte_t *dst_pte;
 292         unsigned long src_addr, dst_addr;
 293         long copied;
 294         struct page *page;
 295         unsigned long vma_hpagesize;
 296         pgoff_t idx;
 297         u32 hash;
 298         struct address_space *mapping;
 299
 300         /*
 301          * There is no default zero huge page for all huge page sizes as
 302          * supported by hugetlb.  A PMD_SIZE huge pages may exist as used
 303          * by THP.  Since we can not reliably insert a zero page, this
 304          * feature is not supported.
 305          */
 306         if (mode == MCOPY_ATOMIC_ZEROPAGE) {
 307                 mmap_read_unlock(dst_mm);
 308                 return -EINVAL;
 309         }
 310
 311         src_addr = src_start;
 312         dst_addr = dst_start;
 313         copied = 0;
 314         page = NULL;
 315         vma_hpagesize = vma_kernel_pagesize(dst_vma);
 316
 317         /*
 318          * Validate alignment based on huge page size
 319          */
 320         err = -EINVAL;
 321         if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1))
 322                 goto out_unlock;
 323
 324 retry:
 325         /*
 326          * On routine entry dst_vma is set.  If we had to drop mmap_lock and
 327          * retry, dst_vma will be set to NULL and we must lookup again.
 328          */
 329         if (!dst_vma) {
 330                 err = -ENOENT;
 331                 dst_vma = find_dst_vma(dst_mm, dst_start, len);
 332                 if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
 333                         goto out_unlock;
 334
 335                 err = -EINVAL;
 336                 if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
 337                         goto out_unlock;
 338
 339                 vm_shared = dst_vma->vm_flags & VM_SHARED;
 340         }
 341
 342         /*
 343          * If not shared, ensure the dst_vma has a anon_vma.
 344          */
 345         err = -ENOMEM;
 346         if (!vm_shared) {
 347                 if (unlikely(anon_vma_prepare(dst_vma)))
 348                         goto out_unlock;
 349         }
 350
 351         while (src_addr < src_start + len) {
 352                 BUG_ON(dst_addr >= dst_start + len);
 353
 354                 /*
 355                  * Serialize via i_mmap_rwsem and hugetlb_fault_mutex.
 356                  * i_mmap_rwsem ensures the dst_pte remains valid even
 357                  * in the case of shared pmds.  fault mutex prevents
 358                  * races with other faulting threads.
 359                  */
 360                 mapping = dst_vma->vm_file->f_mapping;
 361                 i_mmap_lock_read(mapping);
 362                 idx = linear_page_index(dst_vma, dst_addr);
 363                 hash = hugetlb_fault_mutex_hash(mapping, idx);
 364                 mutex_lock(&hugetlb_fault_mutex_table[hash]);
 365
 366                 err = -ENOMEM;
 367                 dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize);
 368                 if (!dst_pte) {
 369                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 370                         i_mmap_unlock_read(mapping);
 371                         goto out_unlock;
 372                 }
 373
 374                 if (mode != MCOPY_ATOMIC_CONTINUE &&
 375                     !huge_pte_none(huge_ptep_get(dst_pte))) {
 376                         err = -EEXIST;
 377                         mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 378                         i_mmap_unlock_read(mapping);
 379                         goto out_unlock;
 380                 }
 381
 382                 err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
 383                                                dst_addr, src_addr, mode, &page);
 384
 385                 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 386                 i_mmap_unlock_read(mapping);
 387
 388                 cond_resched();
 389
 390                 if (unlikely(err == -ENOENT)) {
 391                         mmap_read_unlock(dst_mm);
 392                         BUG_ON(!page);
 393
 394                         err = copy_huge_page_from_user(page,
 395                                                 (const void __user *)src_addr,
 396                                                 vma_hpagesize / PAGE_SIZE,
 397                                                 true);
 398                         if (unlikely(err)) {
 399                                 err = -EFAULT;
 400                                 goto out;
 401                         }
 402                         mmap_read_lock(dst_mm);
 403
 404                         dst_vma = NULL;
 405                         goto retry;
 406                 } else
 407                         BUG_ON(page);
 408
 409                 if (!err) {
 410                         dst_addr += vma_hpagesize;
 411                         src_addr += vma_hpagesize;
 412                         copied += vma_hpagesize;
 413
 414                         if (fatal_signal_pending(current))
 415                                 err = -EINTR;
 416                 }
 417                 if (err)
 418                         break;
 419         }
 420
 421 out_unlock:
 422         mmap_read_unlock(dst_mm);
 423 out:
 424         if (page)
 425                 put_page(page);
 426         BUG_ON(copied < 0);
 427         BUG_ON(err > 0);
 428         BUG_ON(!copied && !err);
 429         return copied ? copied : err;
 430 }
 431 #else /* !CONFIG_HUGETLB_PAGE */
 432 /* fail at build time if gcc attempts to use this */
 433 extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 434                                       struct vm_area_struct *dst_vma,
 435                                       unsigned long dst_start,
 436                                       unsigned long src_start,
 437                                       unsigned long len,
 438                                       enum mcopy_atomic_mode mode);
 439 #endif /* CONFIG_HUGETLB_PAGE */
 440
 441 static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
 442                                                 pmd_t *dst_pmd,
 443                                                 struct vm_area_struct *dst_vma,
 444                                                 unsigned long dst_addr,
 445                                                 unsigned long src_addr,
 446                                                 struct page **page,
 447                                                 enum mcopy_atomic_mode mode,
 448                                                 bool wp_copy)
 449 {
 450         ssize_t err;
 451
 452         if (mode == MCOPY_ATOMIC_CONTINUE) {
 453                 return mcontinue_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
 454                                             wp_copy);
 455         }
 456
 457         /*
 458          * The normal page fault path for a shmem will invoke the
 459          * fault, fill the hole in the file and COW it right away. The
 460          * result generates plain anonymous memory. So when we are
 461          * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
 462          * generate anonymous memory directly without actually filling
 463          * the hole. For the MAP_PRIVATE case the robustness check
 464          * only happens in the pagetable (to verify it's still none)
 465          * and not in the radix tree.
 466          */
 467         if (!(dst_vma->vm_flags & VM_SHARED)) {
 468                 if (mode == MCOPY_ATOMIC_NORMAL)
 469                         err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
 470                                                dst_addr, src_addr, page,
 471                                                wp_copy);
 472                 else
 473                         err = mfill_zeropage_pte(dst_mm, dst_pmd,
 474                                                  dst_vma, dst_addr);
 475         } else {
 476                 VM_WARN_ON_ONCE(wp_copy);
 477                 err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
 478                                              dst_addr, src_addr,
 479                                              mode != MCOPY_ATOMIC_NORMAL,
 480                                              page);
 481         }
 482
 483         return err;
 484 }
 485
 486 static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 487                                               unsigned long dst_start,
 488                                               unsigned long src_start,
 489                                               unsigned long len,
 490                                               enum mcopy_atomic_mode mcopy_mode,
 491                                               atomic_t *mmap_changing,
 492                                               __u64 mode)
 493 {
 494         struct vm_area_struct *dst_vma;
 495         ssize_t err;
 496         pmd_t *dst_pmd;
 497         unsigned long src_addr, dst_addr;
 498         long copied;
 499         struct page *page;
 500         bool wp_copy;
 501
 502         /*
 503          * Sanitize the command parameters:
 504          */
 505         BUG_ON(dst_start & ~PAGE_MASK);
 506         BUG_ON(len & ~PAGE_MASK);
 507
 508         /* Does the address range wrap, or is the span zero-sized? */
 509         BUG_ON(src_start + len <= src_start);
 510         BUG_ON(dst_start + len <= dst_start);
 511
 512         src_addr = src_start;
 513         dst_addr = dst_start;
 514         copied = 0;
 515         page = NULL;
 516 retry:
 517         mmap_read_lock(dst_mm);
 518
 519         /*
 520          * If memory mappings are changing because of non-cooperative
 521          * operation (e.g. mremap) running in parallel, bail out and
 522          * request the user to retry later
 523          */
 524         err = -EAGAIN;
 525         if (mmap_changing && atomic_read(mmap_changing))
 526                 goto out_unlock;
 527
 528         /*
 529          * Make sure the vma is not shared, that the dst range is
 530          * both valid and fully within a single existing vma.
 531          */
 532         err = -ENOENT;
 533         dst_vma = find_dst_vma(dst_mm, dst_start, len);
 534         if (!dst_vma)
 535                 goto out_unlock;
 536
 537         err = -EINVAL;
 538         /*
 539          * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
 540          * it will overwrite vm_ops, so vma_is_anonymous must return false.
 541          */
 542         if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
 543             dst_vma->vm_flags & VM_SHARED))
 544                 goto out_unlock;
 545
 546         /*
 547          * validate 'mode' now that we know the dst_vma: don't allow
 548          * a wrprotect copy if the userfaultfd didn't register as WP.
 549          */
 550         wp_copy = mode & UFFDIO_COPY_MODE_WP;
 551         if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
 552                 goto out_unlock;
 553
 554         /*
 555          * If this is a HUGETLB vma, pass off to appropriate routine
 556          */
 557         if (is_vm_hugetlb_page(dst_vma))
 558                 return  __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
 559                                                 src_start, len, mcopy_mode);
 560
 561         if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
 562                 goto out_unlock;
 563         if (!vma_is_shmem(dst_vma) && mcopy_mode == MCOPY_ATOMIC_CONTINUE)
 564                 goto out_unlock;
 565
 566         /*
 567          * Ensure the dst_vma has a anon_vma or this page
 568          * would get a NULL anon_vma when moved in the
 569          * dst_vma.
 570          */
 571         err = -ENOMEM;
 572         if (!(dst_vma->vm_flags & VM_SHARED) &&
 573             unlikely(anon_vma_prepare(dst_vma)))
 574                 goto out_unlock;
 575
 576         while (src_addr < src_start + len) {
 577                 pmd_t dst_pmdval;
 578
 579                 BUG_ON(dst_addr >= dst_start + len);
 580
 581                 dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
 582                 if (unlikely(!dst_pmd)) {
 583                         err = -ENOMEM;
 584                         break;
 585                 }
 586
 587                 dst_pmdval = pmd_read_atomic(dst_pmd);
 588                 /*
 589                  * If the dst_pmd is mapped as THP don't
 590                  * override it and just be strict.
 591                  */
 592                 if (unlikely(pmd_trans_huge(dst_pmdval))) {
 593                         err = -EEXIST;
 594                         break;
 595                 }
 596                 if (unlikely(pmd_none(dst_pmdval)) &&
 597                     unlikely(__pte_alloc(dst_mm, dst_pmd))) {
 598                         err = -ENOMEM;
 599                         break;
 600                 }
 601                 /* If an huge pmd materialized from under us fail */
 602                 if (unlikely(pmd_trans_huge(*dst_pmd))) {
 603                         err = -EFAULT;
 604                         break;
 605                 }
 606
 607                 BUG_ON(pmd_none(*dst_pmd));
 608                 BUG_ON(pmd_trans_huge(*dst_pmd));
 609
 610                 err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
 611                                        src_addr, &page, mcopy_mode, wp_copy);
 612                 cond_resched();
 613
 614                 if (unlikely(err == -ENOENT)) {
 615                         void *page_kaddr;
 616
 617                         mmap_read_unlock(dst_mm);
 618                         BUG_ON(!page);
 619
 620                         page_kaddr = kmap(page);
 621                         err = copy_from_user(page_kaddr,
 622                                              (const void __user *) src_addr,
 623                                              PAGE_SIZE);
 624                         kunmap(page);
 625                         if (unlikely(err)) {
 626                                 err = -EFAULT;
 627                                 goto out;
 628                         }
 629                         flush_dcache_page(page);
 630                         goto retry;
 631                 } else
 632                         BUG_ON(page);
 633
 634                 if (!err) {
 635                         dst_addr += PAGE_SIZE;
 636                         src_addr += PAGE_SIZE;
 637                         copied += PAGE_SIZE;
 638
 639                         if (fatal_signal_pending(current))
 640                                 err = -EINTR;
 641                 }
 642                 if (err)
 643                         break;
 644         }
 645
 646 out_unlock:
 647         mmap_read_unlock(dst_mm);
 648 out:
 649         if (page)
 650                 put_page(page);
 651         BUG_ON(copied < 0);
 652         BUG_ON(err > 0);
 653         BUG_ON(!copied && !err);
 654         return copied ? copied : err;
 655 }
 656
 657 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 658                      unsigned long src_start, unsigned long len,
 659                      atomic_t *mmap_changing, __u64 mode)
 660 {
 661         return __mcopy_atomic(dst_mm, dst_start, src_start, len,
 662                               MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
 663 }
 664
 665 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
 666                        unsigned long len, atomic_t *mmap_changing)
 667 {
 668         return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
 669                               mmap_changing, 0);
 670 }
 671
 672 ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
 673                        unsigned long len, atomic_t *mmap_changing)
 674 {
 675         return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
 676                               mmap_changing, 0);
 677 }
 678
 679 int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
 680                         unsigned long len, bool enable_wp,
 681                         atomic_t *mmap_changing)
 682 {
 683         struct vm_area_struct *dst_vma;
 684         pgprot_t newprot;
 685         int err;
 686
 687         /*
 688          * Sanitize the command parameters:
 689          */
 690         BUG_ON(start & ~PAGE_MASK);
 691         BUG_ON(len & ~PAGE_MASK);
 692
 693         /* Does the address range wrap, or is the span zero-sized? */
 694         BUG_ON(start + len <= start);
 695
 696         mmap_read_lock(dst_mm);
 697
 698         /*
 699          * If memory mappings are changing because of non-cooperative
 700          * operation (e.g. mremap) running in parallel, bail out and
 701          * request the user to retry later
 702          */
 703         err = -EAGAIN;
 704         if (mmap_changing && atomic_read(mmap_changing))
 705                 goto out_unlock;
 706
 707         err = -ENOENT;
 708         dst_vma = find_dst_vma(dst_mm, start, len);
 709         /*
 710          * Make sure the vma is not shared, that the dst range is
 711          * both valid and fully within a single existing vma.
 712          */
 713         if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
 714                 goto out_unlock;
 715         if (!userfaultfd_wp(dst_vma))
 716                 goto out_unlock;
 717         if (!vma_is_anonymous(dst_vma))
 718                 goto out_unlock;
 719
 720         if (enable_wp)
 721                 newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
 722         else
 723                 newprot = vm_get_page_prot(dst_vma->vm_flags);
 724
 725         change_protection(dst_vma, start, start + len, newprot,
 726                           enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
 727
 728         err = 0;
 729 out_unlock:
 730         mmap_read_unlock(dst_mm);
 731         return err;
 732 }