KVM: s390: fix race in gmap_make_secure()
authorClaudio Imbrenda <imbrenda@linux.ibm.com>
Fri, 28 Apr 2023 09:27:53 +0000 (11:27 +0200)
committerClaudio Imbrenda <imbrenda@linux.ibm.com>
Thu, 4 May 2023 16:26:27 +0000 (18:26 +0200)
Fix a potential race in gmap_make_secure() and remove the last user of
follow_page() without FOLL_GET.

The old code is locking something it doesn't have a reference to, and
as explained by Jason and David in this discussion:
https://lore.kernel.org/linux-mm/Y9J4P%2FRNvY1Ztn0Q@nvidia.com/
it can lead to all kind of bad things, including the page getting
unmapped (MADV_DONTNEED), freed, reallocated as a larger folio and the
unlock_page() would target the wrong bit.
There is also another race with the FOLL_WRITE, which could race
between the follow_page() and the get_locked_pte().

The main point is to remove the last use of follow_page() without
FOLL_GET or FOLL_PIN, removing the races can be considered a nice
bonus.

Link: https://lore.kernel.org/linux-mm/Y9J4P%2FRNvY1Ztn0Q@nvidia.com/
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Fixes: 214d9bbcd3a6 ("s390/mm: provide memory management functions for protected KVM guests")
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Message-Id: <20230428092753.27913-2-imbrenda@linux.ibm.com>

arch/s390/kernel/uv.c

index 9f18a4a..cb2ee06 100644 (file)
@@ -192,21 +192,10 @@ static int expected_page_refs(struct page *page)
        return res;
 }
 
-static int make_secure_pte(pte_t *ptep, unsigned long addr,
-                          struct page *exp_page, struct uv_cb_header *uvcb)
+static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
 {
-       pte_t entry = READ_ONCE(*ptep);
-       struct page *page;
        int expected, cc = 0;
 
-       if (!pte_present(entry))
-               return -ENXIO;
-       if (pte_val(entry) & _PAGE_INVALID)
-               return -ENXIO;
-
-       page = pte_page(entry);
-       if (page != exp_page)
-               return -ENXIO;
        if (PageWriteback(page))
                return -EAGAIN;
        expected = expected_page_refs(page);
@@ -304,17 +293,18 @@ again:
                goto out;
 
        rc = -ENXIO;
-       page = follow_page(vma, uaddr, FOLL_WRITE);
-       if (IS_ERR_OR_NULL(page))
-               goto out;
-
-       lock_page(page);
        ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
-       if (should_export_before_import(uvcb, gmap->mm))
-               uv_convert_from_secure(page_to_phys(page));
-       rc = make_secure_pte(ptep, uaddr, page, uvcb);
+       if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
+               page = pte_page(*ptep);
+               rc = -EAGAIN;
+               if (trylock_page(page)) {
+                       if (should_export_before_import(uvcb, gmap->mm))
+                               uv_convert_from_secure(page_to_phys(page));
+                       rc = make_page_secure(page, uvcb);
+                       unlock_page(page);
+               }
+       }
        pte_unmap_unlock(ptep, ptelock);
-       unlock_page(page);
 out:
        mmap_read_unlock(gmap->mm);