From c9b5d98b25161a7ebee6ea59d6424dd9f33c1b99 Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Fri, 2 Jun 2017 17:06:01 -0700 Subject: [PATCH] xen/vcpu: Handle xen_vcpu_setup() failure in hotplug The hypercall VCPUOP_register_vcpu_info can fail. This failure is handled by making per_cpu(xen_vcpu, cpu) point to its shared_info slot and those without one (cpu >= MAX_VIRT_CPUS) be NULL. For PVH/PVHVM, this is not enough, because we also need to pull these VCPUs out of circulation. Fix for PVH/PVHVM: on registration failure in the cpuhp prepare callback (xen_cpu_up_prepare_hvm()), return an error to the cpuhp state-machine so it can fail the CPU init. Fix for PV: the registration happens before smp_init(), so, in the failure case we clamp setup_max_cpus and limit the number of VCPUs that smp_init() will bring-up to MAX_VIRT_CPUS. This is functionally correct but it makes the code a bit simpler if we get rid of this explicit clamping: for VCPUs that don't have valid xen_vcpu, fail the CPU init in the cpuhp prepare callback (xen_cpu_up_prepare_pv()). Reviewed-by: Boris Ostrovsky Signed-off-by: Ankur Arora Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 46 +++++++++++++++++++++++++------------------- arch/x86/xen/enlighten_hvm.c | 9 +++++---- arch/x86/xen/enlighten_pv.c | 14 +++++++++++++- arch/x86/xen/xen-ops.h | 2 +- 4 files changed, 45 insertions(+), 26 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 276cc21..0e7ef69 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -106,8 +106,10 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), return rc >= 0 ? 0 : rc; } -static void xen_vcpu_setup_restore(int cpu) +static int xen_vcpu_setup_restore(int cpu) { + int rc = 0; + /* Any per_cpu(xen_vcpu) is stale, so reset it */ xen_vcpu_info_reset(cpu); @@ -117,8 +119,10 @@ static void xen_vcpu_setup_restore(int cpu) */ if (xen_pv_domain() || (xen_hvm_domain() && cpu_online(cpu))) { - xen_vcpu_setup(cpu); + rc = xen_vcpu_setup(cpu); } + + return rc; } /* @@ -128,7 +132,7 @@ static void xen_vcpu_setup_restore(int cpu) */ void xen_vcpu_restore(void) { - int cpu; + int cpu, rc; for_each_possible_cpu(cpu) { bool other_cpu = (cpu != smp_processor_id()); @@ -148,22 +152,25 @@ void xen_vcpu_restore(void) if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_runstate_info(cpu); - xen_vcpu_setup_restore(cpu); - - if (other_cpu && is_up && + rc = xen_vcpu_setup_restore(cpu); + if (rc) + pr_emerg_once("vcpu restore failed for cpu=%d err=%d. " + "System will hang.\n", cpu, rc); + /* + * In case xen_vcpu_setup_restore() fails, do not bring up the + * VCPU. This helps us avoid the resulting OOPS when the VCPU + * accesses pvclock_vcpu_time via xen_vcpu (which is NULL.) + * Note that this does not improve the situation much -- now the + * VM hangs instead of OOPSing -- with the VCPUs that did not + * fail, spinning in stop_machine(), waiting for the failed + * VCPUs to come up. + */ + if (other_cpu && is_up && (rc == 0) && HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) BUG(); } } -static void clamp_max_cpus(void) -{ -#ifdef CONFIG_SMP - if (setup_max_cpus > MAX_VIRT_CPUS) - setup_max_cpus = MAX_VIRT_CPUS; -#endif -} - void xen_vcpu_info_reset(int cpu) { if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) { @@ -175,7 +182,7 @@ void xen_vcpu_info_reset(int cpu) } } -void xen_vcpu_setup(int cpu) +int xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; int err; @@ -196,7 +203,7 @@ void xen_vcpu_setup(int cpu) */ if (xen_hvm_domain()) { if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) - return; + return 0; } if (xen_have_vcpu_info_placement) { @@ -230,11 +237,10 @@ void xen_vcpu_setup(int cpu) } } - if (!xen_have_vcpu_info_placement) { - if (cpu >= MAX_VIRT_CPUS) - clamp_max_cpus(); + if (!xen_have_vcpu_info_placement) xen_vcpu_info_reset(cpu); - } + + return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0); } void xen_reboot(int reason) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index ba1afad..13b5fa1 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -89,7 +89,7 @@ static void xen_hvm_crash_shutdown(struct pt_regs *regs) static int xen_cpu_up_prepare_hvm(unsigned int cpu) { - int rc; + int rc = 0; /* * This can happen if CPU was offlined earlier and @@ -104,7 +104,9 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); else per_cpu(xen_vcpu_id, cpu) = cpu; - xen_vcpu_setup(cpu); + rc = xen_vcpu_setup(cpu); + if (rc) + return rc; if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); @@ -113,9 +115,8 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) if (rc) { WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n", cpu, rc); - return rc; } - return 0; + return rc; } static int xen_cpu_dead_hvm(unsigned int cpu) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 9a0714d..bc44d21 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -958,7 +958,16 @@ void __ref xen_setup_vcpu_info_placement(void) for_each_possible_cpu(cpu) { /* Set up direct vCPU id mapping for PV guests. */ per_cpu(xen_vcpu_id, cpu) = cpu; - xen_vcpu_setup(cpu); + + /* + * xen_vcpu_setup(cpu) can fail -- in which case it + * falls back to the shared_info version for cpus + * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS. + * + * xen_cpu_up_prepare_pv() handles the rest by failing + * them in hotplug. + */ + (void) xen_vcpu_setup(cpu); } /* @@ -1432,6 +1441,9 @@ static int xen_cpu_up_prepare_pv(unsigned int cpu) { int rc; + if (per_cpu(xen_vcpu, cpu) == NULL) + return -ENODEV; + xen_setup_timer(cpu); rc = xen_smp_intr_init(cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9082825..0d50044 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -78,7 +78,7 @@ bool xen_vcpu_stolen(int vcpu); extern int xen_have_vcpu_info_placement; -void xen_vcpu_setup(int cpu); +int xen_vcpu_setup(int cpu); void xen_vcpu_info_reset(int cpu); void xen_setup_vcpu_info_placement(void); -- 2.7.4