KVM: arm64: Implement do_share() helper for sharing memory
authorWill Deacon <will@kernel.org>
Wed, 15 Dec 2021 16:12:27 +0000 (16:12 +0000)
committerMarc Zyngier <maz@kernel.org>
Thu, 16 Dec 2021 12:58:57 +0000 (12:58 +0000)
By default, protected KVM isolates memory pages so that they are
accessible only to their owner: be it the host kernel, the hypervisor
at EL2 or (in future) the guest. Establishing shared-memory regions
between these components therefore involves a transition for each page
so that the owner can share memory with a borrower under a certain set
of permissions.

Introduce a do_share() helper for safely sharing a memory region between
two components. Currently, only host-to-hyp sharing is implemented, but
the code is easily extended to handle other combinations and the
permission checks for each component are reusable.

Reviewed-by: Andrew Walbran <qwandor@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20211215161232.1480836-11-qperret@google.com
arch/arm64/kvm/hyp/nvhe/mem_protect.c

index 757dfefe3aeb85e120c8e4431c8a294bcdc72901..e612fd9d89755112657a8a5fc317bc3fd785efd7 100644 (file)
@@ -471,3 +471,240 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
        ret = host_stage2_idmap(addr);
        BUG_ON(ret && ret != -EAGAIN);
 }
+
+/* This corresponds to locking order */
+enum pkvm_component_id {
+       PKVM_ID_HOST,
+       PKVM_ID_HYP,
+};
+
+struct pkvm_mem_transition {
+       u64                             nr_pages;
+
+       struct {
+               enum pkvm_component_id  id;
+               /* Address in the initiator's address space */
+               u64                     addr;
+
+               union {
+                       struct {
+                               /* Address in the completer's address space */
+                               u64     completer_addr;
+                       } host;
+               };
+       } initiator;
+
+       struct {
+               enum pkvm_component_id  id;
+       } completer;
+};
+
+struct pkvm_mem_share {
+       const struct pkvm_mem_transition        tx;
+       const enum kvm_pgtable_prot             completer_prot;
+};
+
+struct check_walk_data {
+       enum pkvm_page_state    desired;
+       enum pkvm_page_state    (*get_page_state)(kvm_pte_t pte);
+};
+
+static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
+                                     kvm_pte_t *ptep,
+                                     enum kvm_pgtable_walk_flags flag,
+                                     void * const arg)
+{
+       struct check_walk_data *d = arg;
+       kvm_pte_t pte = *ptep;
+
+       if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
+               return -EINVAL;
+
+       return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
+}
+
+static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
+                                 struct check_walk_data *data)
+{
+       struct kvm_pgtable_walker walker = {
+               .cb     = __check_page_state_visitor,
+               .arg    = data,
+               .flags  = KVM_PGTABLE_WALK_LEAF,
+       };
+
+       return kvm_pgtable_walk(pgt, addr, size, &walker);
+}
+
+static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
+{
+       if (!kvm_pte_valid(pte) && pte)
+               return PKVM_NOPAGE;
+
+       return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
+}
+
+static int __host_check_page_state_range(u64 addr, u64 size,
+                                        enum pkvm_page_state state)
+{
+       struct check_walk_data d = {
+               .desired        = state,
+               .get_page_state = host_get_page_state,
+       };
+
+       hyp_assert_lock_held(&host_kvm.lock);
+       return check_page_state_range(&host_kvm.pgt, addr, size, &d);
+}
+
+static int __host_set_page_state_range(u64 addr, u64 size,
+                                      enum pkvm_page_state state)
+{
+       enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
+
+       return host_stage2_idmap_locked(addr, size, prot);
+}
+
+static int host_request_owned_transition(u64 *completer_addr,
+                                        const struct pkvm_mem_transition *tx)
+{
+       u64 size = tx->nr_pages * PAGE_SIZE;
+       u64 addr = tx->initiator.addr;
+
+       *completer_addr = tx->initiator.host.completer_addr;
+       return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
+}
+
+static int host_initiate_share(u64 *completer_addr,
+                              const struct pkvm_mem_transition *tx)
+{
+       u64 size = tx->nr_pages * PAGE_SIZE;
+       u64 addr = tx->initiator.addr;
+
+       *completer_addr = tx->initiator.host.completer_addr;
+       return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
+}
+
+static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
+{
+       if (!kvm_pte_valid(pte))
+               return PKVM_NOPAGE;
+
+       return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
+}
+
+static int __hyp_check_page_state_range(u64 addr, u64 size,
+                                       enum pkvm_page_state state)
+{
+       struct check_walk_data d = {
+               .desired        = state,
+               .get_page_state = hyp_get_page_state,
+       };
+
+       hyp_assert_lock_held(&pkvm_pgd_lock);
+       return check_page_state_range(&pkvm_pgtable, addr, size, &d);
+}
+
+static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
+{
+       return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
+                tx->initiator.id != PKVM_ID_HOST);
+}
+
+static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
+                        enum kvm_pgtable_prot perms)
+{
+       u64 size = tx->nr_pages * PAGE_SIZE;
+
+       if (perms != PAGE_HYP)
+               return -EPERM;
+
+       if (__hyp_ack_skip_pgtable_check(tx))
+               return 0;
+
+       return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
+}
+
+static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
+                             enum kvm_pgtable_prot perms)
+{
+       void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
+       enum kvm_pgtable_prot prot;
+
+       prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
+       return pkvm_create_mappings_locked(start, end, prot);
+}
+
+static int check_share(struct pkvm_mem_share *share)
+{
+       const struct pkvm_mem_transition *tx = &share->tx;
+       u64 completer_addr;
+       int ret;
+
+       switch (tx->initiator.id) {
+       case PKVM_ID_HOST:
+               ret = host_request_owned_transition(&completer_addr, tx);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+
+       if (ret)
+               return ret;
+
+       switch (tx->completer.id) {
+       case PKVM_ID_HYP:
+               ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+
+       return ret;
+}
+
+static int __do_share(struct pkvm_mem_share *share)
+{
+       const struct pkvm_mem_transition *tx = &share->tx;
+       u64 completer_addr;
+       int ret;
+
+       switch (tx->initiator.id) {
+       case PKVM_ID_HOST:
+               ret = host_initiate_share(&completer_addr, tx);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+
+       if (ret)
+               return ret;
+
+       switch (tx->completer.id) {
+       case PKVM_ID_HYP:
+               ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+
+       return ret;
+}
+
+/*
+ * do_share():
+ *
+ * The page owner grants access to another component with a given set
+ * of permissions.
+ *
+ * Initiator: OWNED    => SHARED_OWNED
+ * Completer: NOPAGE   => SHARED_BORROWED
+ */
+static int do_share(struct pkvm_mem_share *share)
+{
+       int ret;
+
+       ret = check_share(share);
+       if (ret)
+               return ret;
+
+       return WARN_ON(__do_share(share));
+}