KVM: arm64: Unify handling THP backed host memory

author Suzuki K Poulose <suzuki.poulose@arm.com>

Thu, 7 May 2020 12:35:46 +0000 (20:35 +0800)

committer Marc Zyngier <maz@kernel.org>

Sat, 16 May 2020 14:05:02 +0000 (15:05 +0100)
author Suzuki K Poulose <suzuki.poulose@arm.com>
Thu, 7 May 2020 12:35:46 +0000 (20:35 +0800)
committer Marc Zyngier <maz@kernel.org>
Sat, 16 May 2020 14:05:02 +0000 (15:05 +0100)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c

index ccb44e7d30d913cc7b60a300038efcf90a41dcaa..66eb8e3f6e8c67de4fc080127078c459806cbf8d 100644 (file)
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1375,47 +1375,6 @@ out:
         return ret;
  }
  
-static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
-{
-       kvm_pfn_t pfn = *pfnp;
-       gfn_t gfn = *ipap >> PAGE_SHIFT;
-
-       if (kvm_is_transparent_hugepage(pfn)) {
-               unsigned long mask;
-               /*
-                * The address we faulted on is backed by a transparent huge
-                * page.  However, because we map the compound huge page and
-                * not the individual tail page, we need to transfer the
-                * refcount to the head page.  We have to be careful that the
-                * THP doesn't start to split while we are adjusting the
-                * refcounts.
-                *
-                * We are sure this doesn't happen, because mmu_notifier_retry
-                * was successful and we are holding the mmu_lock, so if this
-                * THP is trying to split, it will be blocked in the mmu
-                * notifier before touching any of the pages, specifically
-                * before being able to call __split_huge_page_refcount().
-                *
-                * We can therefore safely transfer the refcount from PG_tail
-                * to PG_head and switch the pfn from a tail page to the head
-                * page accordingly.
-                */
-               mask = PTRS_PER_PMD - 1;
-               VM_BUG_ON((gfn & mask) != (pfn & mask));
-               if (pfn & mask) {
-                       *ipap &= PMD_MASK;
-                       kvm_release_pfn_clean(pfn);
-                       pfn &= ~mask;
-                       kvm_get_pfn(pfn);
-                       *pfnp = pfn;
-               }
-
-               return true;
-       }
-
-       return false;
-}
-
  /**
   * stage2_wp_ptes - write protect PMD range
   * @pmd:       pointer to pmd entry
@@ -1663,6 +1622,59 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
                (hva & ~(map_size - 1)) + map_size <= uaddr_end;
  }
  
+/*
+ * Check if the given hva is backed by a transparent huge page (THP) and
+ * whether it can be mapped using block mapping in stage2. If so, adjust
+ * the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently
+ * supported. This will need to be updated to support other THP sizes.
+ *
+ * Returns the size of the mapping.
+ */
+static unsigned long
+transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
+                           unsigned long hva, kvm_pfn_t *pfnp,
+                           phys_addr_t *ipap)
+{
+       kvm_pfn_t pfn = *pfnp;
+
+       /*
+        * Make sure the adjustment is done only for THP pages. Also make
+        * sure that the HVA and IPA are sufficiently aligned and that the
+        * block map is contained within the memslot.
+        */
+       if (kvm_is_transparent_hugepage(pfn) &&
+           fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
+               /*
+                * The address we faulted on is backed by a transparent huge
+                * page.  However, because we map the compound huge page and
+                * not the individual tail page, we need to transfer the
+                * refcount to the head page.  We have to be careful that the
+                * THP doesn't start to split while we are adjusting the
+                * refcounts.
+                *
+                * We are sure this doesn't happen, because mmu_notifier_retry
+                * was successful and we are holding the mmu_lock, so if this
+                * THP is trying to split, it will be blocked in the mmu
+                * notifier before touching any of the pages, specifically
+                * before being able to call __split_huge_page_refcount().
+                *
+                * We can therefore safely transfer the refcount from PG_tail
+                * to PG_head and switch the pfn from a tail page to the head
+                * page accordingly.
+                */
+               *ipap &= PMD_MASK;
+               kvm_release_pfn_clean(pfn);
+               pfn &= ~(PTRS_PER_PMD - 1);
+               kvm_get_pfn(pfn);
+               *pfnp = pfn;
+
+               return PMD_SIZE;
+       }
+
+       /* Use page mapping if we cannot use block mapping. */
+       return PAGE_SIZE;
+}
+
  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                           struct kvm_memory_slot *memslot, unsigned long hva,
                           unsigned long fault_status)
@@ -1776,20 +1788,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         if (mmu_notifier_retry(kvm, mmu_seq))
                 goto out_unlock;
  
-       if (vma_pagesize == PAGE_SIZE && !force_pte) {
-               /*
-                * Only PMD_SIZE transparent hugepages(THP) are
-                * currently supported. This code will need to be
-                * updated to support other THP sizes.
-                *
-                * Make sure the host VA and the guest IPA are sufficiently
-                * aligned and that the block is contained within the memslot.
-                */
-               if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
-                   transparent_hugepage_adjust(&pfn, &fault_ipa))
-                       vma_pagesize = PMD_SIZE;
-       }
-
+       /*
+        * If we are not forced to use page mapping, check if we are
+        * backed by a THP and thus use block mapping if possible.
+        */
+       if (vma_pagesize == PAGE_SIZE && !force_pte)
+               vma_pagesize = transparent_hugepage_adjust(memslot, hva,
+                                                          &pfn, &fault_ipa);
         if (writable)
                 kvm_set_pfn_dirty(pfn);
author	Suzuki K Poulose <suzuki.poulose@arm.com>
	Thu, 7 May 2020 12:35:46 +0000 (20:35 +0800)
committer	Marc Zyngier <maz@kernel.org>
	Sat, 16 May 2020 14:05:02 +0000 (15:05 +0100)