return PAGE_SIZE;
}
+static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
+{
+ unsigned long pa;
+
+ if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
+ return huge_page_shift(hstate_vma(vma));
+
+ if (!(vma->vm_flags & VM_PFNMAP))
+ return PAGE_SHIFT;
+
+ VM_BUG_ON(is_vm_hugetlb_page(vma));
+
+ pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
+
+#ifndef __PAGETABLE_PMD_FOLDED
+ if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
+ ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
+ ALIGN(hva, PUD_SIZE) <= vma->vm_end)
+ return PUD_SHIFT;
+#endif
+
+ if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
+ ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
+ ALIGN(hva, PMD_SIZE) <= vma->vm_end)
+ return PMD_SHIFT;
+
+ return PAGE_SHIFT;
+}
+
+ /*
+ * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
+ * able to see the page's tags and therefore they must be initialised first. If
+ * PG_mte_tagged is set, tags have already been initialised.
+ *
+ * The race in the test/set of the PG_mte_tagged flag is handled by:
+ * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
+ * racing to santise the same page
+ * - mmap_lock protects between a VM faulting a page in and the VMM performing
+ * an mprotect() to add VM_MTE
+ */
+ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
+ unsigned long size)
+ {
+ unsigned long i, nr_pages = size >> PAGE_SHIFT;
+ struct page *page;
+
+ if (!kvm_has_mte(kvm))
+ return 0;
+
+ /*
+ * pfn_to_online_page() is used to reject ZONE_DEVICE pages
+ * that may not support tags.
+ */
+ page = pfn_to_online_page(pfn);
+
+ if (!page)
+ return -EFAULT;
+
+ for (i = 0; i < nr_pages; i++, page++) {
+ if (!test_bit(PG_mte_tagged, &page->flags)) {
+ mte_clear_page_tags(page_address(page));
+ set_bit(PG_mte_tagged, &page->flags);
+ }
+ }
+
+ return 0;
+ }
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
return -EFAULT;
}
- if (is_vm_hugetlb_page(vma))
- vma_shift = huge_page_shift(hstate_vma(vma));
- else
- vma_shift = PAGE_SHIFT;
-
- if (logging_active ||
- (vma->vm_flags & VM_PFNMAP)) {
+ /*
+ * logging_active is guaranteed to never be true for VM_PFNMAP
+ * memslots.
+ */
+ if (logging_active) {
force_pte = true;
vma_shift = PAGE_SHIFT;
+ } else {
+ vma_shift = get_vma_page_shift(vma, hva);
}
+ shared = (vma->vm_flags & VM_PFNMAP);
+
switch (vma_shift) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SHIFT:
* If we are not forced to use page mapping, check if we are
* backed by a THP and thus use block mapping if possible.
*/
- if (vma_pagesize == PAGE_SIZE && !force_pte)
+ if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
&pfn, &fault_ipa);
- if (writable)
- prot |= KVM_PGTABLE_PROT_W;
+
- if (fault_status != FSC_PERM && !device) {
++ if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
+ /* Check the VMM hasn't introduced a new VM_SHARED VMA */
- if (kvm_has_mte(kvm) && shared) {
++ if (!shared)
++ ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
++ else
+ ret = -EFAULT;
- goto out_unlock;
- }
- ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
+ if (ret)
+ goto out_unlock;
-
- clean_dcache_guest_page(pfn, vma_pagesize);
+ }
+
- if (exec_fault) {
+ if (writable)
+ prot |= KVM_PGTABLE_PROT_W;
+
+ if (exec_fault)
prot |= KVM_PGTABLE_PROT_X;
- invalidate_icache_guest_page(pfn, vma_pagesize);
- }
if (device)
prot |= KVM_PGTABLE_PROT_DEVICE;
WARN_ON(range->end - range->start != 1);
+ ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
+ if (ret)
+ return false;
+
/*
- * We've moved a page around, probably through CoW, so let's treat it
- * just like a translation fault and clean the cache to the PoC.
- */
- clean_dcache_guest_page(pfn, PAGE_SIZE);
-
- /*
+ * We've moved a page around, probably through CoW, so let's treat
+ * it just like a translation fault and the map handler will clean
+ * the cache to the PoC.
+ *
* The MMU notifiers will have unmapped a huge PMD before calling
* ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
* therefore we never need to clear out a huge PMD through this
if (!vma)
break;
- /*
- * Take the intersection of this VMA with the memory region
- */
- vm_start = max(hva, vma->vm_start);
- vm_end = min(reg_end, vma->vm_end);
-
+ /*
+ * VM_SHARED mappings are not allowed with MTE to avoid races
+ * when updating the PG_mte_tagged page flag, see
+ * sanitise_mte_tags for more details.
+ */
+ if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
+ return -EINVAL;
+
if (vma->vm_flags & VM_PFNMAP) {
- gpa_t gpa = mem->guest_phys_addr +
- (vm_start - mem->userspace_addr);
- phys_addr_t pa;
-
- pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
- pa += vm_start - vma->vm_start;
-
/* IO region dirty page logging not allowed */
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;