1 // SPDX-License-Identifier: GPL-2.0
4 * Architecture neutral utility routines for interacting with
5 * Hyper-V. This file is specifically for code that must be
6 * built-in to the kernel image when CONFIG_HYPERV is set
7 * (vs. being in a module) because it is called from architecture
8 * specific code under arch/.
10 * Copyright (C) 2021, Microsoft, Inc.
12 * Author : Michael Kelley <mikelley@microsoft.com>
15 #include <linux/types.h>
16 #include <linux/acpi.h>
17 #include <linux/export.h>
18 #include <linux/bitfield.h>
19 #include <linux/cpumask.h>
20 #include <linux/sched/task_stack.h>
21 #include <linux/panic_notifier.h>
22 #include <linux/ptrace.h>
23 #include <linux/kdebug.h>
24 #include <linux/kmsg_dump.h>
25 #include <linux/slab.h>
26 #include <linux/dma-map-ops.h>
27 #include <asm/hyperv-tlfs.h>
28 #include <asm/mshyperv.h>
31 * hv_root_partition, ms_hyperv and hv_nested are defined here with other
32 * Hyper-V specific globals so they are shared across all architectures and are
33 * built only when CONFIG_HYPERV is defined. But on x86,
34 * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
35 * defined, and it uses these three variables. So mark them as __weak
36 * here, allowing for an overriding definition in the module containing
37 * ms_hyperv_init_platform().
39 bool __weak hv_root_partition;
40 EXPORT_SYMBOL_GPL(hv_root_partition);
42 bool __weak hv_nested;
43 EXPORT_SYMBOL_GPL(hv_nested);
45 struct ms_hyperv_info __weak ms_hyperv;
46 EXPORT_SYMBOL_GPL(ms_hyperv);
49 EXPORT_SYMBOL_GPL(hv_vp_index);
52 EXPORT_SYMBOL_GPL(hv_max_vp_index);
54 void * __percpu *hyperv_pcpu_input_arg;
55 EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
57 void * __percpu *hyperv_pcpu_output_arg;
58 EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
60 static void hv_kmsg_dump_unregister(void);
62 static struct ctl_table_header *hv_ctl_table_hdr;
65 * Hyper-V specific initialization and shutdown code that is
66 * common across all architectures. Called from architecture
67 * specific initialization functions.
70 void __init hv_common_free(void)
72 unregister_sysctl_table(hv_ctl_table_hdr);
73 hv_ctl_table_hdr = NULL;
75 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
76 hv_kmsg_dump_unregister();
81 free_percpu(hyperv_pcpu_output_arg);
82 hyperv_pcpu_output_arg = NULL;
84 free_percpu(hyperv_pcpu_input_arg);
85 hyperv_pcpu_input_arg = NULL;
89 * Functions for allocating and freeing memory with size and
90 * alignment HV_HYP_PAGE_SIZE. These functions are needed because
91 * the guest page size may not be the same as the Hyper-V page
92 * size. We depend upon kmalloc() aligning power-of-two size
93 * allocations to the allocation size boundary, so that the
94 * allocated memory appears to Hyper-V as a page of the size
98 void *hv_alloc_hyperv_page(void)
100 BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
102 if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
103 return (void *)__get_free_page(GFP_KERNEL);
105 return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
107 EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
109 void *hv_alloc_hyperv_zeroed_page(void)
111 if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
112 return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
114 return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
116 EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
118 void hv_free_hyperv_page(void *addr)
120 if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
121 free_page((unsigned long)addr);
125 EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
127 static void *hv_panic_page;
130 * Boolean to control whether to report panic messages over Hyper-V.
132 * It can be set via /proc/sys/kernel/hyperv_record_panic_msg
134 static int sysctl_record_panic_msg = 1;
137 * sysctl option to allow the user to control whether kmsg data should be
138 * reported to Hyper-V on panic.
140 static struct ctl_table hv_ctl_table[] = {
142 .procname = "hyperv_record_panic_msg",
143 .data = &sysctl_record_panic_msg,
144 .maxlen = sizeof(int),
146 .proc_handler = proc_dointvec_minmax,
147 .extra1 = SYSCTL_ZERO,
153 static int hv_die_panic_notify_crash(struct notifier_block *self,
154 unsigned long val, void *args);
156 static struct notifier_block hyperv_die_report_block = {
157 .notifier_call = hv_die_panic_notify_crash,
160 static struct notifier_block hyperv_panic_report_block = {
161 .notifier_call = hv_die_panic_notify_crash,
165 * The following callback works both as die and panic notifier; its
166 * goal is to provide panic information to the hypervisor unless the
167 * kmsg dumper is used [see hv_kmsg_dump()], which provides more
168 * information but isn't always available.
170 * Notice that both the panic/die report notifiers are registered only
171 * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
173 static int hv_die_panic_notify_crash(struct notifier_block *self,
174 unsigned long val, void *args)
176 struct pt_regs *regs;
179 /* Don't notify Hyper-V unless we have a die oops event or panic. */
180 if (self == &hyperv_panic_report_block) {
182 regs = current_pt_regs();
183 } else { /* die event */
188 regs = ((struct die_args *)args)->regs;
192 * Hyper-V should be notified only once about a panic/die. If we will
193 * be calling hv_kmsg_dump() later with kmsg data, don't do the
196 if (!sysctl_record_panic_msg || !hv_panic_page)
197 hyperv_report_panic(regs, val, is_die);
203 * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
204 * buffer and call into Hyper-V to transfer the data.
206 static void hv_kmsg_dump(struct kmsg_dumper *dumper,
207 enum kmsg_dump_reason reason)
209 struct kmsg_dump_iter iter;
210 size_t bytes_written;
212 /* We are only interested in panics. */
213 if (reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg)
217 * Write dump contents to the page. No need to synchronize; panic should
218 * be single-threaded.
220 kmsg_dump_rewind(&iter);
221 kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
226 * P3 to contain the physical address of the panic page & P4 to
227 * contain the size of the panic data in that page. Rest of the
228 * registers are no-op when the NOTIFY_MSG flag is set.
230 hv_set_register(HV_REGISTER_CRASH_P0, 0);
231 hv_set_register(HV_REGISTER_CRASH_P1, 0);
232 hv_set_register(HV_REGISTER_CRASH_P2, 0);
233 hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page));
234 hv_set_register(HV_REGISTER_CRASH_P4, bytes_written);
237 * Let Hyper-V know there is crash data available along with
240 hv_set_register(HV_REGISTER_CRASH_CTL,
241 (HV_CRASH_CTL_CRASH_NOTIFY |
242 HV_CRASH_CTL_CRASH_NOTIFY_MSG));
245 static struct kmsg_dumper hv_kmsg_dumper = {
246 .dump = hv_kmsg_dump,
249 static void hv_kmsg_dump_unregister(void)
251 kmsg_dump_unregister(&hv_kmsg_dumper);
252 unregister_die_notifier(&hyperv_die_report_block);
253 atomic_notifier_chain_unregister(&panic_notifier_list,
254 &hyperv_panic_report_block);
256 hv_free_hyperv_page(hv_panic_page);
257 hv_panic_page = NULL;
260 static void hv_kmsg_dump_register(void)
264 hv_panic_page = hv_alloc_hyperv_zeroed_page();
265 if (!hv_panic_page) {
266 pr_err("Hyper-V: panic message page memory allocation failed\n");
270 ret = kmsg_dump_register(&hv_kmsg_dumper);
272 pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
273 hv_free_hyperv_page(hv_panic_page);
274 hv_panic_page = NULL;
278 int __init hv_common_init(void)
282 if (hv_is_isolation_supported())
283 sysctl_record_panic_msg = 0;
286 * Hyper-V expects to get crash register data or kmsg when
287 * crash enlightment is available and system crashes. Set
288 * crash_kexec_post_notifiers to be true to make sure that
289 * calling crash enlightment interface before running kdump
292 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
293 u64 hyperv_crash_ctl;
295 crash_kexec_post_notifiers = true;
296 pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n");
299 * Panic message recording (sysctl_record_panic_msg)
300 * is enabled by default in non-isolated guests and
301 * disabled by default in isolated guests; the panic
302 * message recording won't be available in isolated
303 * guests should the following registration fail.
305 hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table);
306 if (!hv_ctl_table_hdr)
307 pr_err("Hyper-V: sysctl table register error");
310 * Register for panic kmsg callback only if the right
311 * capability is supported by the hypervisor.
313 hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL);
314 if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
315 hv_kmsg_dump_register();
317 register_die_notifier(&hyperv_die_report_block);
318 atomic_notifier_chain_register(&panic_notifier_list,
319 &hyperv_panic_report_block);
323 * Allocate the per-CPU state for the hypercall input arg.
324 * If this allocation fails, we will not be able to setup
325 * (per-CPU) hypercall input page and thus this failure is
328 hyperv_pcpu_input_arg = alloc_percpu(void *);
329 BUG_ON(!hyperv_pcpu_input_arg);
331 /* Allocate the per-CPU state for output arg for root */
332 if (hv_root_partition) {
333 hyperv_pcpu_output_arg = alloc_percpu(void *);
334 BUG_ON(!hyperv_pcpu_output_arg);
337 hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
344 for (i = 0; i < num_possible_cpus(); i++)
345 hv_vp_index[i] = VP_INVAL;
351 * Hyper-V specific initialization and die code for
352 * individual CPUs that is common across all architectures.
353 * Called by the CPU hotplug mechanism.
356 int hv_common_cpu_init(unsigned int cpu)
358 void **inputarg, **outputarg;
361 int pgcount = hv_root_partition ? 2 : 1;
363 /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
364 flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
366 inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
369 * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
370 * allocated if this CPU was previously online and then taken offline
373 *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
377 if (hv_root_partition) {
378 outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
379 *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
383 msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
385 hv_vp_index[cpu] = msr_vp_index;
387 if (msr_vp_index > hv_max_vp_index)
388 hv_max_vp_index = msr_vp_index;
393 int hv_common_cpu_die(unsigned int cpu)
396 * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory
397 * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg
398 * may be used by the Hyper-V vPCI driver in reassigning interrupts
399 * as part of the offlining process. The interrupt reassignment
400 * happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and
401 * called this function.
403 * If a previously offlined CPU is brought back online again, the
404 * originally allocated memory is reused in hv_common_cpu_init().
410 /* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
411 bool hv_query_ext_cap(u64 cap_query)
414 * The address of the 'hv_extended_cap' variable will be used as an
415 * output parameter to the hypercall below and so it should be
416 * compatible with 'virt_to_phys'. Which means, it's address should be
417 * directly mapped. Use 'static' to keep it compatible; stack variables
418 * can be virtually mapped, making them incompatible with
420 * Hypercall input/output addresses should also be 8-byte aligned.
422 static u64 hv_extended_cap __aligned(8);
423 static bool hv_extended_cap_queried;
427 * Querying extended capabilities is an extended hypercall. Check if the
428 * partition supports extended hypercall, first.
430 if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
433 /* Extended capabilities do not change at runtime. */
434 if (hv_extended_cap_queried)
435 return hv_extended_cap & cap_query;
437 status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
441 * The query extended capabilities hypercall should not fail under
442 * any normal circumstances. Avoid repeatedly making the hypercall, on
445 hv_extended_cap_queried = true;
446 if (!hv_result_success(status)) {
447 pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
452 return hv_extended_cap & cap_query;
454 EXPORT_SYMBOL_GPL(hv_query_ext_cap);
456 void hv_setup_dma_ops(struct device *dev, bool coherent)
459 * Hyper-V does not offer a vIOMMU in the guest
460 * VM, so pass 0/NULL for the IOMMU settings
462 arch_setup_dma_ops(dev, 0, 0, NULL, coherent);
464 EXPORT_SYMBOL_GPL(hv_setup_dma_ops);
466 bool hv_is_hibernation_supported(void)
468 return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);
470 EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
473 * Default function to read the Hyper-V reference counter, independent
474 * of whether Hyper-V enlightened clocks/timers are being used. But on
475 * architectures where it is used, Hyper-V enlightenment code in
476 * hyperv_timer.c may override this function.
478 static u64 __hv_read_ref_counter(void)
480 return hv_get_register(HV_REGISTER_TIME_REF_COUNT);
483 u64 (*hv_read_reference_counter)(void) = __hv_read_ref_counter;
484 EXPORT_SYMBOL_GPL(hv_read_reference_counter);
486 /* These __weak functions provide default "no-op" behavior and
487 * may be overridden by architecture specific versions. Architectures
488 * for which the default "no-op" behavior is sufficient can leave
489 * them unimplemented and not be cluttered with a bunch of stub
490 * functions in arch-specific code.
493 bool __weak hv_is_isolation_supported(void)
497 EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
499 bool __weak hv_isolation_type_snp(void)
503 EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
505 void __weak hv_setup_vmbus_handler(void (*handler)(void))
508 EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
510 void __weak hv_remove_vmbus_handler(void)
513 EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler);
515 void __weak hv_setup_kexec_handler(void (*handler)(void))
518 EXPORT_SYMBOL_GPL(hv_setup_kexec_handler);
520 void __weak hv_remove_kexec_handler(void)
523 EXPORT_SYMBOL_GPL(hv_remove_kexec_handler);
525 void __weak hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
528 EXPORT_SYMBOL_GPL(hv_setup_crash_handler);
530 void __weak hv_remove_crash_handler(void)
533 EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
535 void __weak hyperv_cleanup(void)
538 EXPORT_SYMBOL_GPL(hyperv_cleanup);
540 u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
542 return HV_STATUS_INVALID_PARAMETER;
544 EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);