Merge branch kvm-arm64/mmu/mte into kvmarm-master/next

author Marc Zyngier <maz@kernel.org>

Tue, 22 Jun 2021 14:09:34 +0000 (15:09 +0100)

committer Marc Zyngier <maz@kernel.org>

Tue, 22 Jun 2021 14:09:34 +0000 (15:09 +0100)
author Marc Zyngier <maz@kernel.org>
Tue, 22 Jun 2021 14:09:34 +0000 (15:09 +0100)
committer Marc Zyngier <maz@kernel.org>
Tue, 22 Jun 2021 14:09:34 +0000 (15:09 +0100)
diff --cc arch/arm64/include/asm/kvm_host.h
Simple merge
diff --cc arch/arm64/kvm/arm.c
Simple merge
diff --cc arch/arm64/kvm/mmu.c

index bf389df,c6a97d4..57292dc
--- 1/arch/arm64/kvm/mmu.c
--- 2/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@@ -824,35 -822,45 +824,74 @@@ transparent_hugepage_adjust(struct kvm_
         return PAGE_SIZE;
   }
   
+ +static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
+ +{
+ +      unsigned long pa;
+ +
+ +      if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
+ +              return huge_page_shift(hstate_vma(vma));
+ +
+ +      if (!(vma->vm_flags & VM_PFNMAP))
+ +              return PAGE_SHIFT;
+ +
+ +      VM_BUG_ON(is_vm_hugetlb_page(vma));
+ +
+ +      pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
+ +
+ +#ifndef __PAGETABLE_PMD_FOLDED
+ +      if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
+ +          ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
+ +          ALIGN(hva, PUD_SIZE) <= vma->vm_end)
+ +              return PUD_SHIFT;
+ +#endif
+ +
+ +      if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
+ +          ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
+ +          ALIGN(hva, PMD_SIZE) <= vma->vm_end)
+ +              return PMD_SHIFT;
+ +
+ +      return PAGE_SHIFT;
+ +}
+ +
+ /*
+  * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
+  * able to see the page's tags and therefore they must be initialised first. If
+  * PG_mte_tagged is set, tags have already been initialised.
+  *
+  * The race in the test/set of the PG_mte_tagged flag is handled by:
+  * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
+  *   racing to santise the same page
+  * - mmap_lock protects between a VM faulting a page in and the VMM performing
+  *   an mprotect() to add VM_MTE
+  */
+ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
+                            unsigned long size)
+ {
+       unsigned long i, nr_pages = size >> PAGE_SHIFT;
+       struct page *page;
+ 
+       if (!kvm_has_mte(kvm))
+               return 0;
+ 
+       /*
+        * pfn_to_online_page() is used to reject ZONE_DEVICE pages
+        * that may not support tags.
+        */
+       page = pfn_to_online_page(pfn);
+ 
+       if (!page)
+               return -EFAULT;
+ 
+       for (i = 0; i < nr_pages; i++, page++) {
+               if (!test_bit(PG_mte_tagged, &page->flags)) {
+                       mte_clear_page_tags(page_address(page));
+                       set_bit(PG_mte_tagged, &page->flags);
+               }
+       }
+ 
+       return 0;
+ }
+ 
   static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                           struct kvm_memory_slot *memslot, unsigned long hva,
                           unsigned long fault_status)
@@@ -896,17 -902,19 +936,19 @@@
                 return -EFAULT;
         }
   
- -      if (is_vm_hugetlb_page(vma))
- -              vma_shift = huge_page_shift(hstate_vma(vma));
- -      else
- -              vma_shift = PAGE_SHIFT;
- -
- -      if (logging_active ||
- -          (vma->vm_flags & VM_PFNMAP)) {
+ +      /*
+ +       * logging_active is guaranteed to never be true for VM_PFNMAP
+ +       * memslots.
+ +       */
+ +      if (logging_active) {
                 force_pte = true;
                 vma_shift = PAGE_SHIFT;
+ +      } else {
+ +              vma_shift = get_vma_page_shift(vma, hva);
         }
   
+       shared = (vma->vm_flags & VM_PFNMAP);
+ 
         switch (vma_shift) {
   #ifndef __PAGETABLE_PMD_FOLDED
         case PUD_SHIFT:
@@@ -1008,14 -1007,29 +1050,25 @@@
          * If we are not forced to use page mapping, check if we are
          * backed by a THP and thus use block mapping if possible.
          */
- -      if (vma_pagesize == PAGE_SIZE && !force_pte)
+ +      if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
                 vma_pagesize = transparent_hugepage_adjust(memslot, hva,
                                                            &pfn, &fault_ipa);
- -      if (writable)
- -              prot |= KVM_PGTABLE_PROT_W;
+ 
- -      if (fault_status != FSC_PERM && !device) {
++      if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
+               /* Check the VMM hasn't introduced a new VM_SHARED VMA */
- -              if (kvm_has_mte(kvm) && shared) {
++              if (!shared)
++                      ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
++              else
+                       ret = -EFAULT;
- -                      goto out_unlock;
- -              }
- -              ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
+               if (ret)
+                       goto out_unlock;
- -
- -              clean_dcache_guest_page(pfn, vma_pagesize);
+       }
+ 
- -      if (exec_fault) {
+ +      if (writable)
+ +              prot |= KVM_PGTABLE_PROT_W;
+ +
+ +      if (exec_fault)
                 prot |= KVM_PGTABLE_PROT_X;
- -              invalidate_icache_guest_page(pfn, vma_pagesize);
- -      }
   
         if (device)
                 prot |= KVM_PGTABLE_PROT_DEVICE;
@@@ -1212,11 -1227,17 +1266,15 @@@ bool kvm_set_spte_gfn(struct kvm *kvm, 
   
         WARN_ON(range->end - range->start != 1);
   
+       ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
+       if (ret)
+               return false;
+ 
         /*
- -       * We've moved a page around, probably through CoW, so let's treat it
- -       * just like a translation fault and clean the cache to the PoC.
- -       */
- -      clean_dcache_guest_page(pfn, PAGE_SIZE);
- -
- -      /*
+ +       * We've moved a page around, probably through CoW, so let's treat
+ +       * it just like a translation fault and the map handler will clean
+ +       * the cache to the PoC.
+ +       *
          * The MMU notifiers will have unmapped a huge PMD before calling
          * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
          * therefore we never need to clear out a huge PMD through this
@@@ -1414,7 -1438,28 +1472,15 @@@ int kvm_arch_prepare_memory_region(stru
                 if (!vma)
                         break;
   
- -              /*
- -               * Take the intersection of this VMA with the memory region
- -               */
- -              vm_start = max(hva, vma->vm_start);
- -              vm_end = min(reg_end, vma->vm_end);
- -
+               /*
+                * VM_SHARED mappings are not allowed with MTE to avoid races
+                * when updating the PG_mte_tagged page flag, see
+                * sanitise_mte_tags for more details.
+                */
+               if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
+                       return -EINVAL;
+ 
                 if (vma->vm_flags & VM_PFNMAP) {
- -                      gpa_t gpa = mem->guest_phys_addr +
- -                                  (vm_start - mem->userspace_addr);
- -                      phys_addr_t pa;
- -
- -                      pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
- -                      pa += vm_start - vma->vm_start;
- -
                         /* IO region dirty page logging not allowed */
                         if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
                                 ret = -EINVAL;
author	Marc Zyngier <maz@kernel.org>
	Tue, 22 Jun 2021 14:09:34 +0000 (15:09 +0100)
committer	Marc Zyngier <maz@kernel.org>
	Tue, 22 Jun 2021 14:09:34 +0000 (15:09 +0100)
		1	2
arch/arm64/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kvm/arm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kvm/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history