Merge branch kvm-arm64/mmu/mte into kvmarm-master/next
authorMarc Zyngier <maz@kernel.org>
Tue, 22 Jun 2021 14:09:34 +0000 (15:09 +0100)
committerMarc Zyngier <maz@kernel.org>
Tue, 22 Jun 2021 14:09:34 +0000 (15:09 +0100)
KVM/arm64 support for MTE, courtesy of Steven Price.
It allows the guest to use memory tagging, and offers
a new userspace API to save/restore the tags.

* kvm-arm64/mmu/mte:
  KVM: arm64: Document MTE capability and ioctl
  KVM: arm64: Add ioctl to fetch/store tags in a guest
  KVM: arm64: Expose KVM_ARM_CAP_MTE
  KVM: arm64: Save/restore MTE registers
  KVM: arm64: Introduce MTE VM feature
  arm64: mte: Sync tags for pages where PTE is untagged

Signed-off-by: Marc Zyngier <maz@kernel.org>
1  2 
arch/arm64/include/asm/kvm_host.h
arch/arm64/kvm/arm.c
arch/arm64/kvm/mmu.c

Simple merge
Simple merge
@@@ -824,35 -822,45 +824,74 @@@ transparent_hugepage_adjust(struct kvm_
        return PAGE_SIZE;
  }
  
 +static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
 +{
 +      unsigned long pa;
 +
 +      if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
 +              return huge_page_shift(hstate_vma(vma));
 +
 +      if (!(vma->vm_flags & VM_PFNMAP))
 +              return PAGE_SHIFT;
 +
 +      VM_BUG_ON(is_vm_hugetlb_page(vma));
 +
 +      pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
 +
 +#ifndef __PAGETABLE_PMD_FOLDED
 +      if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
 +          ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
 +          ALIGN(hva, PUD_SIZE) <= vma->vm_end)
 +              return PUD_SHIFT;
 +#endif
 +
 +      if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
 +          ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
 +          ALIGN(hva, PMD_SIZE) <= vma->vm_end)
 +              return PMD_SHIFT;
 +
 +      return PAGE_SHIFT;
 +}
 +
+ /*
+  * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
+  * able to see the page's tags and therefore they must be initialised first. If
+  * PG_mte_tagged is set, tags have already been initialised.
+  *
+  * The race in the test/set of the PG_mte_tagged flag is handled by:
+  * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
+  *   racing to santise the same page
+  * - mmap_lock protects between a VM faulting a page in and the VMM performing
+  *   an mprotect() to add VM_MTE
+  */
+ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
+                            unsigned long size)
+ {
+       unsigned long i, nr_pages = size >> PAGE_SHIFT;
+       struct page *page;
+       if (!kvm_has_mte(kvm))
+               return 0;
+       /*
+        * pfn_to_online_page() is used to reject ZONE_DEVICE pages
+        * that may not support tags.
+        */
+       page = pfn_to_online_page(pfn);
+       if (!page)
+               return -EFAULT;
+       for (i = 0; i < nr_pages; i++, page++) {
+               if (!test_bit(PG_mte_tagged, &page->flags)) {
+                       mte_clear_page_tags(page_address(page));
+                       set_bit(PG_mte_tagged, &page->flags);
+               }
+       }
+       return 0;
+ }
  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                          struct kvm_memory_slot *memslot, unsigned long hva,
                          unsigned long fault_status)
                return -EFAULT;
        }
  
 -      if (is_vm_hugetlb_page(vma))
 -              vma_shift = huge_page_shift(hstate_vma(vma));
 -      else
 -              vma_shift = PAGE_SHIFT;
 -
 -      if (logging_active ||
 -          (vma->vm_flags & VM_PFNMAP)) {
 +      /*
 +       * logging_active is guaranteed to never be true for VM_PFNMAP
 +       * memslots.
 +       */
 +      if (logging_active) {
                force_pte = true;
                vma_shift = PAGE_SHIFT;
 +      } else {
 +              vma_shift = get_vma_page_shift(vma, hva);
        }
  
+       shared = (vma->vm_flags & VM_PFNMAP);
        switch (vma_shift) {
  #ifndef __PAGETABLE_PMD_FOLDED
        case PUD_SHIFT:
         * If we are not forced to use page mapping, check if we are
         * backed by a THP and thus use block mapping if possible.
         */
 -      if (vma_pagesize == PAGE_SIZE && !force_pte)
 +      if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
                vma_pagesize = transparent_hugepage_adjust(memslot, hva,
                                                           &pfn, &fault_ipa);
 -      if (writable)
 -              prot |= KVM_PGTABLE_PROT_W;
 -      if (fault_status != FSC_PERM && !device) {
++      if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
+               /* Check the VMM hasn't introduced a new VM_SHARED VMA */
 -              if (kvm_has_mte(kvm) && shared) {
++              if (!shared)
++                      ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
++              else
+                       ret = -EFAULT;
 -                      goto out_unlock;
 -              }
 -              ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
+               if (ret)
+                       goto out_unlock;
 -
 -              clean_dcache_guest_page(pfn, vma_pagesize);
+       }
 -      if (exec_fault) {
 +      if (writable)
 +              prot |= KVM_PGTABLE_PROT_W;
 +
 +      if (exec_fault)
                prot |= KVM_PGTABLE_PROT_X;
 -              invalidate_icache_guest_page(pfn, vma_pagesize);
 -      }
  
        if (device)
                prot |= KVM_PGTABLE_PROT_DEVICE;
@@@ -1212,11 -1227,17 +1266,15 @@@ bool kvm_set_spte_gfn(struct kvm *kvm, 
  
        WARN_ON(range->end - range->start != 1);
  
+       ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
+       if (ret)
+               return false;
        /*
 -       * We've moved a page around, probably through CoW, so let's treat it
 -       * just like a translation fault and clean the cache to the PoC.
 -       */
 -      clean_dcache_guest_page(pfn, PAGE_SIZE);
 -
 -      /*
 +       * We've moved a page around, probably through CoW, so let's treat
 +       * it just like a translation fault and the map handler will clean
 +       * the cache to the PoC.
 +       *
         * The MMU notifiers will have unmapped a huge PMD before calling
         * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
         * therefore we never need to clear out a huge PMD through this
@@@ -1414,7 -1438,28 +1472,15 @@@ int kvm_arch_prepare_memory_region(stru
                if (!vma)
                        break;
  
 -              /*
 -               * Take the intersection of this VMA with the memory region
 -               */
 -              vm_start = max(hva, vma->vm_start);
 -              vm_end = min(reg_end, vma->vm_end);
 -
+               /*
+                * VM_SHARED mappings are not allowed with MTE to avoid races
+                * when updating the PG_mte_tagged page flag, see
+                * sanitise_mte_tags for more details.
+                */
+               if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
+                       return -EINVAL;
                if (vma->vm_flags & VM_PFNMAP) {
 -                      gpa_t gpa = mem->guest_phys_addr +
 -                                  (vm_start - mem->userspace_addr);
 -                      phys_addr_t pa;
 -
 -                      pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
 -                      pa += vm_start - vma->vm_start;
 -
                        /* IO region dirty page logging not allowed */
                        if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
                                ret = -EINVAL;