Merge branch 'kvm-guest-sev-migration' into kvm-master
authorPaolo Bonzini <pbonzini@redhat.com>
Thu, 11 Nov 2021 12:40:26 +0000 (07:40 -0500)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 11 Nov 2021 12:40:26 +0000 (07:40 -0500)
Add guest api and guest kernel support for SEV live migration.

Introduces a new hypercall to notify the host of changes to the page
encryption status.  If the page is encrypted then it must be migrated
through the SEV firmware or a helper VM sharing the key.  If page is
not encrypted then it can be migrated normally by userspace.  This new
hypercall is invoked using paravirt_ops.

Conflicts: sev_active() replaced by cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT).

arch/x86/include/asm/kvm_para.h
arch/x86/include/asm/mem_encrypt.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/set_memory.h
arch/x86/kernel/kvm.c
arch/x86/kernel/paravirt.c
arch/x86/mm/mem_encrypt.c
arch/x86/mm/pat/set_memory.c
include/linux/efi.h

index 6929987..56935eb 100644 (file)
@@ -83,6 +83,18 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
        return ret;
 }
 
+static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
+                                     unsigned long p2, unsigned long p3)
+{
+       long ret;
+
+       asm volatile("vmmcall"
+                    : "=a"(ret)
+                    : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
+                    : "memory");
+       return ret;
+}
+
 #ifdef CONFIG_KVM_GUEST
 void kvmclock_init(void);
 void kvmclock_disable(void);
index 2d4f5c1..e2c6f43 100644 (file)
@@ -44,6 +44,8 @@ void __init sme_enable(struct boot_params *bp);
 
 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
 int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
+                                           bool enc);
 
 void __init mem_encrypt_free_decrypted_mem(void);
 
@@ -78,6 +80,8 @@ static inline int __init
 early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
 static inline int __init
 early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
+static inline void __init
+early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
 
 static inline void mem_encrypt_free_decrypted_mem(void) { }
 
index cebec95..21c4a69 100644 (file)
@@ -97,6 +97,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
        PVOP_VCALL1(mmu.exit_mmap, mm);
 }
 
+static inline void notify_page_enc_status_changed(unsigned long pfn,
+                                                 int npages, bool enc)
+{
+       PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
+}
+
 #ifdef CONFIG_PARAVIRT_XXL
 static inline void load_sp0(unsigned long sp0)
 {
index fc1151e..a69012e 100644 (file)
@@ -168,6 +168,7 @@ struct pv_mmu_ops {
 
        /* Hook for intercepting the destruction of an mm_struct. */
        void (*exit_mmap)(struct mm_struct *mm);
+       void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
 
 #ifdef CONFIG_PARAVIRT_XXL
        struct paravirt_callee_save read_cr2;
index 43fa081..8726175 100644 (file)
@@ -83,6 +83,7 @@ int set_pages_rw(struct page *page, int numpages);
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
 bool kernel_page_present(struct page *page);
+void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
 
 extern int kernel_set_to_readonly;
 
index 8863d19..41e2965 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/swait.h>
 #include <linux/syscore_ops.h>
 #include <linux/cc_platform.h>
+#include <linux/efi.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
 #include <asm/traps.h>
@@ -41,6 +42,7 @@
 #include <asm/ptrace.h>
 #include <asm/reboot.h>
 #include <asm/svm.h>
+#include <asm/e820/api.h>
 
 DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
 
@@ -434,6 +436,8 @@ static void kvm_guest_cpu_offline(bool shutdown)
        kvm_disable_steal_time();
        if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
                wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+       if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
+               wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
        kvm_pv_disable_apf();
        if (!shutdown)
                apf_task_wake_all();
@@ -548,6 +552,55 @@ static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
        __send_ipi_mask(local_mask, vector);
 }
 
+static int __init setup_efi_kvm_sev_migration(void)
+{
+       efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled";
+       efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID;
+       efi_status_t status;
+       unsigned long size;
+       bool enabled;
+
+       if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) ||
+           !kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
+               return 0;
+
+       if (!efi_enabled(EFI_BOOT))
+               return 0;
+
+       if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
+               pr_info("%s : EFI runtime services are not enabled\n", __func__);
+               return 0;
+       }
+
+       size = sizeof(enabled);
+
+       /* Get variable contents into buffer */
+       status = efi.get_variable(efi_sev_live_migration_enabled,
+                                 &efi_variable_guid, NULL, &size, &enabled);
+
+       if (status == EFI_NOT_FOUND) {
+               pr_info("%s : EFI live migration variable not found\n", __func__);
+               return 0;
+       }
+
+       if (status != EFI_SUCCESS) {
+               pr_info("%s : EFI variable retrieval failed\n", __func__);
+               return 0;
+       }
+
+       if (enabled == 0) {
+               pr_info("%s: live migration disabled in EFI\n", __func__);
+               return 0;
+       }
+
+       pr_info("%s : live migration enabled in EFI\n", __func__);
+       wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
+
+       return 1;
+}
+
+late_initcall(setup_efi_kvm_sev_migration);
+
 /*
  * Set the IPI entry points
  */
@@ -806,8 +859,62 @@ static bool __init kvm_msi_ext_dest_id(void)
        return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
 }
 
+static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
+{
+       kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
+                          KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
+}
+
 static void __init kvm_init_platform(void)
 {
+       if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
+           kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
+               unsigned long nr_pages;
+               int i;
+
+               pv_ops.mmu.notify_page_enc_status_changed =
+                       kvm_sev_hc_page_enc_status;
+
+               /*
+                * Reset the host's shared pages list related to kernel
+                * specific page encryption status settings before we load a
+                * new kernel by kexec. Reset the page encryption status
+                * during early boot intead of just before kexec to avoid SMP
+                * races during kvm_pv_guest_cpu_reboot().
+                * NOTE: We cannot reset the complete shared pages list
+                * here as we need to retain the UEFI/OVMF firmware
+                * specific settings.
+                */
+
+               for (i = 0; i < e820_table->nr_entries; i++) {
+                       struct e820_entry *entry = &e820_table->entries[i];
+
+                       if (entry->type != E820_TYPE_RAM)
+                               continue;
+
+                       nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);
+
+                       kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr,
+                                      nr_pages,
+                                      KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
+               }
+
+               /*
+                * Ensure that _bss_decrypted section is marked as decrypted in the
+                * shared pages list.
+                */
+               nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
+                                       PAGE_SIZE);
+               early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
+                                               nr_pages, 0);
+
+               /*
+                * If not booted using EFI, enable Live migration support.
+                */
+               if (!efi_enabled(EFI_BOOT))
+                       wrmsrl(MSR_KVM_MIGRATION_CONTROL,
+                              KVM_MIGRATION_READY);
+       }
        kvmclock_init();
        x86_platform.apic_post_init = kvm_apic_init;
 }
index 7157c2d..7f7636a 100644 (file)
@@ -337,6 +337,7 @@ struct paravirt_patch_template pv_ops = {
                        (void (*)(struct mmu_gather *, void *))tlb_remove_page,
 
        .mmu.exit_mmap          = paravirt_nop,
+       .mmu.notify_page_enc_status_changed     = paravirt_nop,
 
 #ifdef CONFIG_PARAVIRT_XXL
        .mmu.read_cr2           = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
index 23d54b8..3548730 100644 (file)
@@ -229,28 +229,75 @@ void __init sev_setup_arch(void)
        swiotlb_adjust_size(size);
 }
 
-static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
 {
-       pgprot_t old_prot, new_prot;
-       unsigned long pfn, pa, size;
-       pte_t new_pte;
+       unsigned long pfn = 0;
+       pgprot_t prot;
 
        switch (level) {
        case PG_LEVEL_4K:
                pfn = pte_pfn(*kpte);
-               old_prot = pte_pgprot(*kpte);
+               prot = pte_pgprot(*kpte);
                break;
        case PG_LEVEL_2M:
                pfn = pmd_pfn(*(pmd_t *)kpte);
-               old_prot = pmd_pgprot(*(pmd_t *)kpte);
+               prot = pmd_pgprot(*(pmd_t *)kpte);
                break;
        case PG_LEVEL_1G:
                pfn = pud_pfn(*(pud_t *)kpte);
-               old_prot = pud_pgprot(*(pud_t *)kpte);
+               prot = pud_pgprot(*(pud_t *)kpte);
                break;
        default:
-               return;
+               WARN_ONCE(1, "Invalid level for kpte\n");
+               return 0;
+       }
+
+       if (ret_prot)
+               *ret_prot = prot;
+
+       return pfn;
+}
+
+void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
+{
+#ifdef CONFIG_PARAVIRT
+       unsigned long sz = npages << PAGE_SHIFT;
+       unsigned long vaddr_end = vaddr + sz;
+
+       while (vaddr < vaddr_end) {
+               int psize, pmask, level;
+               unsigned long pfn;
+               pte_t *kpte;
+
+               kpte = lookup_address(vaddr, &level);
+               if (!kpte || pte_none(*kpte)) {
+                       WARN_ONCE(1, "kpte lookup for vaddr\n");
+                       return;
+               }
+
+               pfn = pg_level_to_pfn(level, kpte, NULL);
+               if (!pfn)
+                       continue;
+
+               psize = page_level_size(level);
+               pmask = page_level_mask(level);
+
+               notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc);
+
+               vaddr = (vaddr & pmask) + psize;
        }
+#endif
+}
+
+static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+{
+       pgprot_t old_prot, new_prot;
+       unsigned long pfn, pa, size;
+       pte_t new_pte;
+
+       pfn = pg_level_to_pfn(level, kpte, &old_prot);
+       if (!pfn)
+               return;
 
        new_prot = old_prot;
        if (enc)
@@ -286,12 +333,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 static int __init early_set_memory_enc_dec(unsigned long vaddr,
                                           unsigned long size, bool enc)
 {
-       unsigned long vaddr_end, vaddr_next;
+       unsigned long vaddr_end, vaddr_next, start;
        unsigned long psize, pmask;
        int split_page_size_mask;
        int level, ret;
        pte_t *kpte;
 
+       start = vaddr;
        vaddr_next = vaddr;
        vaddr_end = vaddr + size;
 
@@ -346,6 +394,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
 
        ret = 0;
 
+       notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
 out:
        __flush_tlb_all();
        return ret;
@@ -361,6 +410,11 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
        return early_set_memory_enc_dec(vaddr, size, true);
 }
 
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
+{
+       notify_range_enc_status_changed(vaddr, npages, enc);
+}
+
 /* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
 bool force_dma_unencrypted(struct device *dev)
 {
index 934dc5b..b407211 100644 (file)
@@ -2023,6 +2023,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
         */
        cpa_flush(&cpa, 0);
 
+       /*
+        * Notify hypervisor that a given memory range is mapped encrypted
+        * or decrypted.
+        */
+       notify_range_enc_status_changed(addr, numpages, enc);
+
        return ret;
 }
 
index 6b5d36b..dbd39b2 100644 (file)
@@ -362,6 +362,7 @@ void efi_native_runtime_setup(void);
 
 /* OEM GUIDs */
 #define DELLEMC_EFI_RCI2_TABLE_GUID            EFI_GUID(0x2d9f28a2, 0xa886, 0x456a,  0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
+#define AMD_SEV_MEM_ENCRYPT_GUID               EFI_GUID(0x0cf29b71, 0x9e51, 0x433a,  0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75)
 
 typedef struct {
        efi_guid_t guid;