Merge branch kvm-arm64/pkvm-vcpu-state into kvmarm-master/next
authorMarc Zyngier <maz@kernel.org>
Mon, 5 Dec 2022 14:30:49 +0000 (14:30 +0000)
committerMarc Zyngier <maz@kernel.org>
Mon, 5 Dec 2022 14:37:23 +0000 (14:37 +0000)
* kvm-arm64/pkvm-vcpu-state: (25 commits)
  : .
  : Large drop of pKVM patches from Will Deacon and co, adding
  : a private vm/vcpu state at EL2, managed independently from
  : the EL1 state. From the cover letter:
  :
  : "This is version six of the pKVM EL2 state series, extending the pKVM
  : hypervisor code so that it can dynamically instantiate and manage VM
  : data structures without the host being able to access them directly.
  : These structures consist of a hyp VM, a set of hyp vCPUs and the stage-2
  : page-table for the MMU. The pages used to hold the hypervisor structures
  : are returned to the host when the VM is destroyed."
  : .
  KVM: arm64: Use the pKVM hyp vCPU structure in handle___kvm_vcpu_run()
  KVM: arm64: Don't unnecessarily map host kernel sections at EL2
  KVM: arm64: Explicitly map 'kvm_vgic_global_state' at EL2
  KVM: arm64: Maintain a copy of 'kvm_arm_vmid_bits' at EL2
  KVM: arm64: Unmap 'kvm_arm_hyp_percpu_base' from the host
  KVM: arm64: Return guest memory from EL2 via dedicated teardown memcache
  KVM: arm64: Instantiate guest stage-2 page-tables at EL2
  KVM: arm64: Consolidate stage-2 initialisation into a single function
  KVM: arm64: Add generic hyp_memcache helpers
  KVM: arm64: Provide I-cache invalidation by virtual address at EL2
  KVM: arm64: Initialise hypervisor copies of host symbols unconditionally
  KVM: arm64: Add per-cpu fixmap infrastructure at EL2
  KVM: arm64: Instantiate pKVM hypervisor VM and vCPU structures from EL1
  KVM: arm64: Add infrastructure to create and track pKVM instances at EL2
  KVM: arm64: Rename 'host_kvm' to 'host_mmu'
  KVM: arm64: Add hyp_spinlock_t static initializer
  KVM: arm64: Include asm/kvm_mmu.h in nvhe/mem_protect.h
  KVM: arm64: Add helpers to pin memory shared with the hypervisor at EL2
  KVM: arm64: Prevent the donation of no-map pages
  KVM: arm64: Implement do_donate() helper for donating memory
  ...

Signed-off-by: Marc Zyngier <maz@kernel.org>
1  2 
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/kvm/arm.c
arch/arm64/kvm/hyp/nvhe/mem_protect.c
arch/arm64/kvm/hyp/nvhe/mm.c
arch/arm64/kvm/hyp/nvhe/setup.c
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/mmu.c

Simple merge
Simple merge
@@@ -2043,9 -2071,11 +2060,11 @@@ static int init_hyp_mode(void
                }
  
                /* Prepare the CPU initialization parameters */
 -              cpu_prepare_hyp_mode(cpu);
 +              cpu_prepare_hyp_mode(cpu, hyp_va_bits);
        }
  
+       kvm_hyp_init_symbols();
        if (is_protected_kvm_enabled()) {
                init_cpu_logical_map();
  
@@@ -79,11 -91,6 +91,11 @@@ static void host_s2_put_page(void *addr
        hyp_put_page(&host_s2_pool, addr);
  }
  
-       kvm_pgtable_stage2_free_removed(&host_kvm.mm_ops, addr, level);
 +static void host_s2_free_removed_table(void *addr, u32 level)
 +{
++      kvm_pgtable_stage2_free_removed(&host_mmu.mm_ops, addr, level);
 +}
 +
  static int prepare_s2_pool(void *pgt_pool_base)
  {
        unsigned long nr_pages, pfn;
        if (ret)
                return ret;
  
-       host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) {
+       host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
                .zalloc_pages_exact = host_s2_zalloc_pages_exact,
                .zalloc_page = host_s2_zalloc_page,
 +              .free_removed_table = host_s2_free_removed_table,
                .phys_to_virt = hyp_phys_to_virt,
                .virt_to_phys = hyp_virt_to_phys,
                .page_count = hyp_page_count,
@@@ -256,8 -397,8 +403,8 @@@ static bool range_is_memory(u64 start, 
  static inline int __host_stage2_idmap(u64 start, u64 end,
                                      enum kvm_pgtable_prot prot)
  {
-       return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
+       return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
 -                                    prot, &host_s2_pool);
 +                                    prot, &host_s2_pool, 0);
  }
  
  /*
@@@ -423,15 -565,18 +571,15 @@@ struct check_walk_data 
        enum pkvm_page_state    (*get_page_state)(kvm_pte_t pte);
  };
  
 -static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
 -                                    kvm_pte_t *ptep,
 -                                    enum kvm_pgtable_walk_flags flag,
 -                                    void * const arg)
 +static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
 +                                    enum kvm_pgtable_walk_flags visit)
  {
 -      struct check_walk_data *d = arg;
 -      kvm_pte_t pte = *ptep;
 +      struct check_walk_data *d = ctx->arg;
  
-       if (kvm_pte_valid(ctx->old) && !addr_is_memory(kvm_pte_to_phys(ctx->old)))
 -      if (kvm_pte_valid(pte) && !addr_is_allowed_memory(kvm_pte_to_phys(pte)))
++      if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old)))
                return -EINVAL;
  
 -      return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
 +      return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM;
  }
  
  static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
@@@ -189,6 -219,103 +219,102 @@@ int hyp_map_vectors(void
        return 0;
  }
  
 -static int __create_fixmap_slot_cb(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 -                                 enum kvm_pgtable_walk_flags flag,
 -                                 void * const arg)
+ void *hyp_fixmap_map(phys_addr_t phys)
+ {
+       struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots);
+       kvm_pte_t pte, *ptep = slot->ptep;
+       pte = *ptep;
+       pte &= ~kvm_phys_to_pte(KVM_PHYS_INVALID);
+       pte |= kvm_phys_to_pte(phys) | KVM_PTE_VALID;
+       WRITE_ONCE(*ptep, pte);
+       dsb(ishst);
+       return (void *)slot->addr;
+ }
+ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
+ {
+       kvm_pte_t *ptep = slot->ptep;
+       u64 addr = slot->addr;
+       WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID);
+       /*
+        * Irritatingly, the architecture requires that we use inner-shareable
+        * broadcast TLB invalidation here in case another CPU speculates
+        * through our fixmap and decides to create an "amalagamation of the
+        * values held in the TLB" due to the apparent lack of a
+        * break-before-make sequence.
+        *
+        * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
+        */
+       dsb(ishst);
+       __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1));
+       dsb(ish);
+       isb();
+ }
+ void hyp_fixmap_unmap(void)
+ {
+       fixmap_clear_slot(this_cpu_ptr(&fixmap_slots));
+ }
 -      struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)arg);
++static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx,
++                                 enum kvm_pgtable_walk_flags visit)
+ {
 -      if (!kvm_pte_valid(*ptep) || level != KVM_PGTABLE_MAX_LEVELS - 1)
++      struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg);
 -      slot->addr = addr;
 -      slot->ptep = ptep;
++      if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_MAX_LEVELS - 1)
+               return -EINVAL;
++      slot->addr = ctx->addr;
++      slot->ptep = ctx->ptep;
+       /*
+        * Clear the PTE, but keep the page-table page refcount elevated to
+        * prevent it from ever being freed. This lets us manipulate the PTEs
+        * by hand safely without ever needing to allocate memory.
+        */
+       fixmap_clear_slot(slot);
+       return 0;
+ }
+ static int create_fixmap_slot(u64 addr, u64 cpu)
+ {
+       struct kvm_pgtable_walker walker = {
+               .cb     = __create_fixmap_slot_cb,
+               .flags  = KVM_PGTABLE_WALK_LEAF,
+               .arg = (void *)cpu,
+       };
+       return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker);
+ }
+ int hyp_create_pcpu_fixmap(void)
+ {
+       unsigned long addr, i;
+       int ret;
+       for (i = 0; i < hyp_nr_cpus; i++) {
+               ret = pkvm_alloc_private_va_range(PAGE_SIZE, &addr);
+               if (ret)
+                       return ret;
+               ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE,
+                                         __hyp_pa(__hyp_bss_start), PAGE_HYP);
+               if (ret)
+                       return ret;
+               ret = create_fixmap_slot(addr, i);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+ }
  int hyp_create_idmap(u32 hyp_va_bits)
  {
        unsigned long start, end;
@@@ -186,30 -189,23 +189,20 @@@ static void hpool_put_page(void *addr
        hyp_put_page(&hpool, addr);
  }
  
- static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx,
-                                        enum kvm_pgtable_walk_flags visit)
 -static int fix_host_ownership_walker(u64 addr, u64 end, u32 level,
 -                                   kvm_pte_t *ptep,
 -                                   enum kvm_pgtable_walk_flags flag,
 -                                   void * const arg)
++static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
++                                   enum kvm_pgtable_walk_flags visit)
  {
-       struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
        enum kvm_pgtable_prot prot;
        enum pkvm_page_state state;
 -      kvm_pte_t pte = *ptep;
        phys_addr_t phys;
  
 -      if (!kvm_pte_valid(pte))
 +      if (!kvm_pte_valid(ctx->old))
                return 0;
  
-       /*
-        * Fix-up the refcount for the page-table pages as the early allocator
-        * was unable to access the hyp_vmemmap and so the buddy allocator has
-        * initialised the refcount to '1'.
-        */
-       mm_ops->get_page(ctx->ptep);
-       if (visit != KVM_PGTABLE_WALK_LEAF)
-               return 0;
 -      if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
 +      if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1))
                return -EINVAL;
  
 -      phys = kvm_pte_to_phys(pte);
 +      phys = kvm_pte_to_phys(ctx->old);
        if (!addr_is_memory(phys))
                return -EINVAL;
  
         * Adjust the host stage-2 mappings to match the ownership attributes
         * configured in the hypervisor stage-1.
         */
 -      state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
 +      state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old));
        switch (state) {
        case PKVM_PAGE_OWNED:
-               return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id);
+               return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
        case PKVM_PAGE_SHARED_OWNED:
                prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED);
                break;
        return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
  }
  
- static int finalize_host_mappings(void)
 -static int fix_hyp_pgtable_refcnt_walker(u64 addr, u64 end, u32 level,
 -                                       kvm_pte_t *ptep,
 -                                       enum kvm_pgtable_walk_flags flag,
 -                                       void * const arg)
++static int fix_hyp_pgtable_refcnt_walker(const struct kvm_pgtable_visit_ctx *ctx,
++                                       enum kvm_pgtable_walk_flags visit)
+ {
 -      struct kvm_pgtable_mm_ops *mm_ops = arg;
 -      kvm_pte_t pte = *ptep;
 -
+       /*
+        * Fix-up the refcount for the page-table pages as the early allocator
+        * was unable to access the hyp_vmemmap and so the buddy allocator has
+        * initialised the refcount to '1'.
+        */
 -      if (kvm_pte_valid(pte))
 -              mm_ops->get_page(ptep);
++      if (kvm_pte_valid(ctx->old))
++              ctx->mm_ops->get_page(ctx->ptep);
+       return 0;
+ }
+ static int fix_host_ownership(void)
  {
        struct kvm_pgtable_walker walker = {
-               .cb     = finalize_host_mappings_walker,
-               .flags  = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
+               .cb     = fix_host_ownership_walker,
+               .flags  = KVM_PGTABLE_WALK_LEAF,
        };
        int i, ret;
  
@@@ -1217,18 -1188,29 +1205,27 @@@ int __kvm_pgtable_stage2_init(struct kv
        return 0;
  }
  
 -static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 -                            enum kvm_pgtable_walk_flags flag,
 -                            void * const arg)
+ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
+ {
+       u32 ia_bits = VTCR_EL2_IPA(vtcr);
+       u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
+       u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
+       return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
+ }
 +static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
 +                            enum kvm_pgtable_walk_flags visit)
  {
 -      struct kvm_pgtable_mm_ops *mm_ops = arg;
 -      kvm_pte_t pte = *ptep;
 +      struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
  
 -      if (!stage2_pte_is_counted(pte))
 +      if (!stage2_pte_is_counted(ctx->old))
                return 0;
  
 -      mm_ops->put_page(ptep);
 +      mm_ops->put_page(ctx->ptep);
  
 -      if (kvm_pte_table(pte, level))
 -              mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
 +      if (kvm_pte_table(ctx->old, ctx->level))
 +              mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops));
  
        return 0;
  }
Simple merge