Merge tag 'kvmarm-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm...
[platform/kernel/linux-starfive.git] / arch / arm64 / kvm / hyp / pgtable.c
index 95dae02..aa740a9 100644 (file)
 
 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX        GENMASK(4, 2)
 #define KVM_PTE_LEAF_ATTR_LO_S1_AP     GENMASK(7, 6)
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO  3
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW  1
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO          \
+       ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW          \
+       ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
 #define KVM_PTE_LEAF_ATTR_LO_S1_SH     GENMASK(9, 8)
 #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS  3
 #define KVM_PTE_LEAF_ATTR_LO_S1_AF     BIT(10)
@@ -34,7 +36,7 @@
 #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS  3
 #define KVM_PTE_LEAF_ATTR_LO_S2_AF     BIT(10)
 
-#define KVM_PTE_LEAF_ATTR_HI           GENMASK(63, 51)
+#define KVM_PTE_LEAF_ATTR_HI           GENMASK(63, 50)
 
 #define KVM_PTE_LEAF_ATTR_HI_SW                GENMASK(58, 55)
 
@@ -42,6 +44,8 @@
 
 #define KVM_PTE_LEAF_ATTR_HI_S2_XN     BIT(54)
 
+#define KVM_PTE_LEAF_ATTR_HI_S1_GP     BIT(50)
+
 #define KVM_PTE_LEAF_ATTR_S2_PERMS     (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
                                         KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
                                         KVM_PTE_LEAF_ATTR_HI_S2_XN)
@@ -63,6 +67,16 @@ struct kvm_pgtable_walk_data {
        const u64                       end;
 };
 
+static bool kvm_pgtable_walk_skip_bbm_tlbi(const struct kvm_pgtable_visit_ctx *ctx)
+{
+       return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_BBM_TLBI);
+}
+
+static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
+{
+       return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_CMO);
+}
+
 static bool kvm_phys_is_valid(u64 phys)
 {
        return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
@@ -386,6 +400,9 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
 
                if (device)
                        return -EINVAL;
+
+               if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
+                       attr |= KVM_PTE_LEAF_ATTR_HI_S1_GP;
        } else {
                attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
        }
@@ -623,10 +640,18 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
 #ifdef CONFIG_ARM64_HW_AFDBM
        /*
         * Enable the Hardware Access Flag management, unconditionally
-        * on all CPUs. The features is RES0 on CPUs without the support
-        * and must be ignored by the CPUs.
+        * on all CPUs. In systems that have asymmetric support for the feature
+        * this allows KVM to leverage hardware support on the subset of cores
+        * that implement the feature.
+        *
+        * The architecture requires VTCR_EL2.HA to be RES0 (thus ignored by
+        * hardware) on implementations that do not advertise support for the
+        * feature. As such, setting HA unconditionally is safe, unless you
+        * happen to be running on a design that has unadvertised support for
+        * HAFDBS. Here be dragons.
         */
-       vtcr |= VTCR_EL2_HA;
+       if (!cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
+               vtcr |= VTCR_EL2_HA;
 #endif /* CONFIG_ARM64_HW_AFDBM */
 
        /* Set the vmid bits */
@@ -755,14 +780,17 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
        if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED))
                return false;
 
-       /*
-        * Perform the appropriate TLB invalidation based on the evicted pte
-        * value (if any).
-        */
-       if (kvm_pte_table(ctx->old, ctx->level))
-               kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
-       else if (kvm_pte_valid(ctx->old))
-               kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
+       if (!kvm_pgtable_walk_skip_bbm_tlbi(ctx)) {
+               /*
+                * Perform the appropriate TLB invalidation based on the
+                * evicted pte value (if any).
+                */
+               if (kvm_pte_table(ctx->old, ctx->level))
+                       kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+               else if (kvm_pte_valid(ctx->old))
+                       kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+                                    ctx->addr, ctx->level);
+       }
 
        if (stage2_pte_is_counted(ctx->old))
                mm_ops->put_page(ctx->ptep);
@@ -869,11 +897,13 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
                return -EAGAIN;
 
        /* Perform CMOs before installation of the guest stage-2 PTE */
-       if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
+       if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->dcache_clean_inval_poc &&
+           stage2_pte_cacheable(pgt, new))
                mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
-                                               granule);
+                                              granule);
 
-       if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
+       if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->icache_inval_pou &&
+           stage2_pte_executable(new))
                mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
 
        stage2_make_pte(ctx, new);
@@ -895,7 +925,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
        if (ret)
                return ret;
 
-       mm_ops->free_removed_table(childp, ctx->level);
+       mm_ops->free_unlinked_table(childp, ctx->level);
        return 0;
 }
 
@@ -940,7 +970,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
  * The TABLE_PRE callback runs for table entries on the way down, looking
  * for table entries which we could conceivably replace with a block entry
  * for this mapping. If it finds one it replaces the entry and calls
- * kvm_pgtable_mm_ops::free_removed_table() to tear down the detached table.
+ * kvm_pgtable_mm_ops::free_unlinked_table() to tear down the detached table.
  *
  * Otherwise, the LEAF callback performs the mapping at the existing leaves
  * instead.
@@ -1209,7 +1239,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
                                       KVM_PGTABLE_WALK_HANDLE_FAULT |
                                       KVM_PGTABLE_WALK_SHARED);
        if (!ret)
-               kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
+               kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level);
        return ret;
 }
 
@@ -1242,6 +1272,162 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
        return kvm_pgtable_walk(pgt, addr, size, &walker);
 }
 
+kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
+                                             u64 phys, u32 level,
+                                             enum kvm_pgtable_prot prot,
+                                             void *mc, bool force_pte)
+{
+       struct stage2_map_data map_data = {
+               .phys           = phys,
+               .mmu            = pgt->mmu,
+               .memcache       = mc,
+               .force_pte      = force_pte,
+       };
+       struct kvm_pgtable_walker walker = {
+               .cb             = stage2_map_walker,
+               .flags          = KVM_PGTABLE_WALK_LEAF |
+                                 KVM_PGTABLE_WALK_SKIP_BBM_TLBI |
+                                 KVM_PGTABLE_WALK_SKIP_CMO,
+               .arg            = &map_data,
+       };
+       /*
+        * The input address (.addr) is irrelevant for walking an
+        * unlinked table. Construct an ambiguous IA range to map
+        * kvm_granule_size(level) worth of memory.
+        */
+       struct kvm_pgtable_walk_data data = {
+               .walker = &walker,
+               .addr   = 0,
+               .end    = kvm_granule_size(level),
+       };
+       struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
+       kvm_pte_t *pgtable;
+       int ret;
+
+       if (!IS_ALIGNED(phys, kvm_granule_size(level)))
+               return ERR_PTR(-EINVAL);
+
+       ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
+       if (ret)
+               return ERR_PTR(ret);
+
+       pgtable = mm_ops->zalloc_page(mc);
+       if (!pgtable)
+               return ERR_PTR(-ENOMEM);
+
+       ret = __kvm_pgtable_walk(&data, mm_ops, (kvm_pteref_t)pgtable,
+                                level + 1);
+       if (ret) {
+               kvm_pgtable_stage2_free_unlinked(mm_ops, pgtable, level);
+               mm_ops->put_page(pgtable);
+               return ERR_PTR(ret);
+       }
+
+       return pgtable;
+}
+
+/*
+ * Get the number of page-tables needed to replace a block with a
+ * fully populated tree up to the PTE entries. Note that @level is
+ * interpreted as in "level @level entry".
+ */
+static int stage2_block_get_nr_page_tables(u32 level)
+{
+       switch (level) {
+       case 1:
+               return PTRS_PER_PTE + 1;
+       case 2:
+               return 1;
+       case 3:
+               return 0;
+       default:
+               WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
+                            level >= KVM_PGTABLE_MAX_LEVELS);
+               return -EINVAL;
+       };
+}
+
+static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
+                              enum kvm_pgtable_walk_flags visit)
+{
+       struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
+       struct kvm_mmu_memory_cache *mc = ctx->arg;
+       struct kvm_s2_mmu *mmu;
+       kvm_pte_t pte = ctx->old, new, *childp;
+       enum kvm_pgtable_prot prot;
+       u32 level = ctx->level;
+       bool force_pte;
+       int nr_pages;
+       u64 phys;
+
+       /* No huge-pages exist at the last level */
+       if (level == KVM_PGTABLE_MAX_LEVELS - 1)
+               return 0;
+
+       /* We only split valid block mappings */
+       if (!kvm_pte_valid(pte))
+               return 0;
+
+       nr_pages = stage2_block_get_nr_page_tables(level);
+       if (nr_pages < 0)
+               return nr_pages;
+
+       if (mc->nobjs >= nr_pages) {
+               /* Build a tree mapped down to the PTE granularity. */
+               force_pte = true;
+       } else {
+               /*
+                * Don't force PTEs, so create_unlinked() below does
+                * not populate the tree up to the PTE level. The
+                * consequence is that the call will require a single
+                * page of level 2 entries at level 1, or a single
+                * page of PTEs at level 2. If we are at level 1, the
+                * PTEs will be created recursively.
+                */
+               force_pte = false;
+               nr_pages = 1;
+       }
+
+       if (mc->nobjs < nr_pages)
+               return -ENOMEM;
+
+       mmu = container_of(mc, struct kvm_s2_mmu, split_page_cache);
+       phys = kvm_pte_to_phys(pte);
+       prot = kvm_pgtable_stage2_pte_prot(pte);
+
+       childp = kvm_pgtable_stage2_create_unlinked(mmu->pgt, phys,
+                                                   level, prot, mc, force_pte);
+       if (IS_ERR(childp))
+               return PTR_ERR(childp);
+
+       if (!stage2_try_break_pte(ctx, mmu)) {
+               kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level);
+               mm_ops->put_page(childp);
+               return -EAGAIN;
+       }
+
+       /*
+        * Note, the contents of the page table are guaranteed to be made
+        * visible before the new PTE is assigned because stage2_make_pte()
+        * writes the PTE using smp_store_release().
+        */
+       new = kvm_init_table_pte(childp, mm_ops);
+       stage2_make_pte(ctx, new);
+       dsb(ishst);
+       return 0;
+}
+
+int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+                            struct kvm_mmu_memory_cache *mc)
+{
+       struct kvm_pgtable_walker walker = {
+               .cb     = stage2_split_walker,
+               .flags  = KVM_PGTABLE_WALK_LEAF,
+               .arg    = mc,
+       };
+
+       return kvm_pgtable_walk(pgt, addr, size, &walker);
+}
 
 int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
                              struct kvm_pgtable_mm_ops *mm_ops,
@@ -1311,7 +1497,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
        pgt->pgd = NULL;
 }
 
-void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
 {
        kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
        struct kvm_pgtable_walker walker = {