1 // SPDX-License-Identifier: GPL-2.0-only
3 * Machine check injection support.
4 * Copyright 2008 Intel Corporation.
10 * The AMD part (from mce_amd_inj.c): a simple MCE injection facility
11 * for testing different aspects of the RAS code. This driver should be
12 * built as module so that it can be loaded on production kernels for
15 * Copyright (c) 2010-17: Borislav Petkov <bp@alien8.de>
16 * Advanced Micro Devices Inc.
19 #include <linux/cpu.h>
20 #include <linux/debugfs.h>
21 #include <linux/kernel.h>
22 #include <linux/module.h>
23 #include <linux/notifier.h>
24 #include <linux/pci.h>
25 #include <linux/uaccess.h>
27 #include <asm/amd_nb.h>
29 #include <asm/irq_vectors.h>
37 * Collect all the MCi_XXX settings
39 static struct mce i_mce;
40 static struct dentry *dfs_inj;
42 #define MAX_FLAG_OPT_SIZE 4
46 SW_INJ = 0, /* SW injection, simply decode the error */
47 HW_INJ, /* Trigger a #MC */
48 DFR_INT_INJ, /* Trigger Deferred error interrupt */
49 THR_INT_INJ, /* Trigger threshold interrupt */
53 static const char * const flags_options[] = {
61 /* Set default injection to SW_INJ */
62 static enum injection_type inj_type = SW_INJ;
64 #define MCE_INJECT_SET(reg) \
65 static int inj_##reg##_set(void *data, u64 val) \
67 struct mce *m = (struct mce *)data; \
73 MCE_INJECT_SET(status);
78 #define MCE_INJECT_GET(reg) \
79 static int inj_##reg##_get(void *data, u64 *val) \
81 struct mce *m = (struct mce *)data; \
87 MCE_INJECT_GET(status);
93 DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
94 DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
95 DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
96 DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
98 /* Use the user provided IPID value on a sw injection. */
99 static int inj_ipid_set(void *data, u64 val)
101 struct mce *m = (struct mce *)data;
103 if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
104 if (inj_type == SW_INJ)
111 DEFINE_SIMPLE_ATTRIBUTE(ipid_fops, inj_ipid_get, inj_ipid_set, "%llx\n");
113 static void setup_inj_struct(struct mce *m)
115 memset(m, 0, sizeof(struct mce));
117 m->cpuvendor = boot_cpu_data.x86_vendor;
118 m->time = ktime_get_real_seconds();
119 m->cpuid = cpuid_eax(1);
120 m->microcode = boot_cpu_data.microcode;
123 /* Update fake mce registers on current CPU. */
124 static void inject_mce(struct mce *m)
126 struct mce *i = &per_cpu(injectm, m->extcpu);
128 /* Make sure no one reads partially written injectm */
132 /* First set the fields after finished */
133 i->extcpu = m->extcpu;
135 /* Now write record in order, finished last (except above) */
136 memcpy(i, m, sizeof(struct mce));
137 /* Finally activate it */
142 static void raise_poll(struct mce *m)
147 memset(&b, 0xff, sizeof(mce_banks_t));
148 local_irq_save(flags);
149 machine_check_poll(0, &b);
150 local_irq_restore(flags);
154 static void raise_exception(struct mce *m, struct pt_regs *pregs)
160 memset(®s, 0, sizeof(struct pt_regs));
165 /* do_machine_check() expects interrupts disabled -- at least */
166 local_irq_save(flags);
167 do_machine_check(pregs);
168 local_irq_restore(flags);
172 static cpumask_var_t mce_inject_cpumask;
173 static DEFINE_MUTEX(mce_inject_mutex);
175 static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
177 int cpu = smp_processor_id();
178 struct mce *m = this_cpu_ptr(&injectm);
179 if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
181 cpumask_clear_cpu(cpu, mce_inject_cpumask);
182 if (m->inject_flags & MCJ_EXCEPTION)
183 raise_exception(m, regs);
189 static void mce_irq_ipi(void *info)
191 int cpu = smp_processor_id();
192 struct mce *m = this_cpu_ptr(&injectm);
194 if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
195 m->inject_flags & MCJ_EXCEPTION) {
196 cpumask_clear_cpu(cpu, mce_inject_cpumask);
197 raise_exception(m, NULL);
201 /* Inject mce on current CPU */
202 static int raise_local(void)
204 struct mce *m = this_cpu_ptr(&injectm);
205 int context = MCJ_CTX(m->inject_flags);
209 if (m->inject_flags & MCJ_EXCEPTION) {
210 pr_info("Triggering MCE exception on CPU %d\n", cpu);
214 * Could do more to fake interrupts like
215 * calling irq_enter, but the necessary
216 * machinery isn't exported currently.
219 case MCJ_CTX_PROCESS:
220 raise_exception(m, NULL);
223 pr_info("Invalid MCE context\n");
226 pr_info("MCE exception done on CPU %d\n", cpu);
227 } else if (m->status) {
228 pr_info("Starting machine check poll CPU %d\n", cpu);
231 pr_info("Machine check poll done on CPU %d\n", cpu);
238 static void __maybe_unused raise_mce(struct mce *m)
240 int context = MCJ_CTX(m->inject_flags);
244 if (context == MCJ_CTX_RANDOM)
247 if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
252 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
253 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
254 for_each_online_cpu(cpu) {
255 struct mce *mcpu = &per_cpu(injectm, cpu);
256 if (!mcpu->finished ||
257 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
258 cpumask_clear_cpu(cpu, mce_inject_cpumask);
260 if (!cpumask_empty(mce_inject_cpumask)) {
261 if (m->inject_flags & MCJ_IRQ_BROADCAST) {
263 * don't wait because mce_irq_ipi is necessary
264 * to be sync with following raise_local
267 smp_call_function_many(mce_inject_cpumask,
268 mce_irq_ipi, NULL, 0);
270 } else if (m->inject_flags & MCJ_NMI_BROADCAST)
271 apic->send_IPI_mask(mce_inject_cpumask,
275 while (!cpumask_empty(mce_inject_cpumask)) {
276 if (!time_before(jiffies, start + 2*HZ)) {
277 pr_err("Timeout waiting for mce inject %lx\n",
278 *cpumask_bits(mce_inject_cpumask));
293 static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
296 struct mce *m = (struct mce *)data;
301 mutex_lock(&mce_inject_mutex);
303 mutex_unlock(&mce_inject_mutex);
308 static struct notifier_block inject_nb = {
309 .notifier_call = mce_inject_raise,
313 * Caller needs to be make sure this cpu doesn't disappear
314 * from under us, i.e.: get_cpu/put_cpu.
316 static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
321 err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
323 pr_err("%s: error reading HWCR\n", __func__);
327 enable ? (l |= BIT(18)) : (l &= ~BIT(18));
329 err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
331 pr_err("%s: error writing HWCR\n", __func__);
336 static int __set_inj(const char *buf)
340 for (i = 0; i < N_INJ_TYPES; i++) {
341 if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
349 static ssize_t flags_read(struct file *filp, char __user *ubuf,
350 size_t cnt, loff_t *ppos)
352 char buf[MAX_FLAG_OPT_SIZE];
355 n = sprintf(buf, "%s\n", flags_options[inj_type]);
357 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
360 static ssize_t flags_write(struct file *filp, const char __user *ubuf,
361 size_t cnt, loff_t *ppos)
363 char buf[MAX_FLAG_OPT_SIZE], *__buf;
366 if (!cnt || cnt > MAX_FLAG_OPT_SIZE)
369 if (copy_from_user(&buf, ubuf, cnt))
374 /* strip whitespace */
375 __buf = strstrip(buf);
377 err = __set_inj(__buf);
379 pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
388 static const struct file_operations flags_fops = {
390 .write = flags_write,
391 .llseek = generic_file_llseek,
395 * On which CPU to inject?
397 MCE_INJECT_GET(extcpu);
399 static int inj_extcpu_set(void *data, u64 val)
401 struct mce *m = (struct mce *)data;
403 if (val >= nr_cpu_ids || !cpu_online(val)) {
404 pr_err("%s: Invalid CPU: %llu\n", __func__, val);
411 DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
413 static void trigger_mce(void *info)
415 asm volatile("int $18");
418 static void trigger_dfr_int(void *info)
420 asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
423 static void trigger_thr_int(void *info)
425 asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
428 static u32 get_nbc_for_node(int node_id)
430 struct cpuinfo_x86 *c = &boot_cpu_data;
433 cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
435 return cores_per_node * node_id;
438 static void toggle_nb_mca_mst_cpu(u16 nid)
440 struct amd_northbridge *nb;
445 nb = node_to_amd_nb(nid);
453 err = pci_read_config_dword(F3, NBCFG, &val);
455 pr_err("%s: Error reading F%dx%03x.\n",
456 __func__, PCI_FUNC(F3->devfn), NBCFG);
463 pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
467 err = pci_write_config_dword(F3, NBCFG, val);
469 pr_err("%s: Error writing F%dx%03x.\n",
470 __func__, PCI_FUNC(F3->devfn), NBCFG);
473 static void prepare_msrs(void *info)
475 struct mce m = *(struct mce *)info;
478 wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
480 if (boot_cpu_has(X86_FEATURE_SMCA)) {
481 if (m.inject_flags == DFR_INT_INJ) {
482 wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
483 wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
485 wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
486 wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
489 wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
490 wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
492 wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
493 wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
494 wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
498 static void do_inject(void)
501 unsigned int cpu = i_mce.extcpu;
504 i_mce.tsc = rdtsc_ordered();
506 i_mce.status |= MCI_STATUS_VAL;
509 i_mce.status |= MCI_STATUS_MISCV;
512 i_mce.status |= MCI_STATUS_SYNDV;
514 if (inj_type == SW_INJ) {
519 /* prep MCE global settings for the injection */
520 mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
522 if (!(i_mce.status & MCI_STATUS_PCC))
523 mcg_status |= MCG_STATUS_RIPV;
526 * Ensure necessary status bits for deferred errors:
527 * - MCx_STATUS[Deferred]: make sure it is a deferred error
528 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
530 if (inj_type == DFR_INT_INJ) {
531 i_mce.status |= MCI_STATUS_DEFERRED;
532 i_mce.status &= ~MCI_STATUS_UC;
536 * For multi node CPUs, logging and reporting of bank 4 errors happens
537 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
538 * Fam10h and later BKDGs.
540 if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
542 boot_cpu_data.x86 < 0x17) {
543 toggle_nb_mca_mst_cpu(topology_die_id(cpu));
544 cpu = get_nbc_for_node(topology_die_id(cpu));
548 if (!cpu_online(cpu))
551 toggle_hw_mce_inject(cpu, true);
553 i_mce.mcgstatus = mcg_status;
554 i_mce.inject_flags = inj_type;
555 smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
557 toggle_hw_mce_inject(cpu, false);
561 smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
564 smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
567 smp_call_function_single(cpu, trigger_mce, NULL, 0);
576 * This denotes into which bank we're injecting and triggers
577 * the injection, at the same time.
579 static int inj_bank_set(void *data, u64 val)
581 struct mce *m = (struct mce *)data;
585 /* Get bank count on target CPU so we can handle non-uniform values. */
586 rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
587 n_banks = cap & MCG_BANKCNT_MASK;
589 if (val >= n_banks) {
590 pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu);
597 * sw-only injection allows to write arbitrary values into the MCA
598 * registers because it tests only the decoding paths.
600 if (inj_type == SW_INJ)
604 * Read IPID value to determine if a bank is populated on the target
607 if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
610 if (rdmsrl_on_cpu(m->extcpu, MSR_AMD64_SMCA_MCx_IPID(val), &ipid)) {
611 pr_err("Error reading IPID on CPU%d\n", m->extcpu);
616 pr_err("Cannot inject into unpopulated bank %llu\n", val);
624 /* Reset injection struct */
625 setup_inj_struct(&i_mce);
630 MCE_INJECT_GET(bank);
632 DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
634 static const char readme_msg[] =
635 "Description of the files and their usages:\n"
637 "Note1: i refers to the bank number below.\n"
638 "Note2: See respective BKDGs for the exact bit definitions of the files below\n"
639 "as they mirror the hardware registers.\n"
641 "status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
642 "\t attributes of the error which caused the MCE.\n"
644 "misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
645 "\t used for error thresholding purposes and its validity is indicated by\n"
646 "\t MCi_STATUS[MiscV].\n"
648 "synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
649 "\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
651 "addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
652 "\t associated with the error.\n"
654 "cpu:\t The CPU to inject the error on.\n"
656 "bank:\t Specify the bank you want to inject the error into: the number of\n"
657 "\t banks in a processor varies and is family/model-specific, therefore, the\n"
658 "\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
661 "flags:\t Injection type to be performed. Writing to this file will trigger a\n"
662 "\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
663 "\t for AMD processors.\n"
665 "\t Allowed error injection types:\n"
666 "\t - \"sw\": Software error injection. Decode error to a human-readable \n"
667 "\t format only. Safe to use.\n"
668 "\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
669 "\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
670 "\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
671 "\t before injecting.\n"
672 "\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
673 "\t error APIC interrupt handler to handle the error if the feature is \n"
674 "\t is present in hardware. \n"
675 "\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
676 "\t APIC interrupt handler to handle the error. \n"
678 "ipid:\t IPID (AMD-specific)\n"
682 inj_readme_read(struct file *filp, char __user *ubuf,
683 size_t cnt, loff_t *ppos)
685 return simple_read_from_buffer(ubuf, cnt, ppos,
686 readme_msg, strlen(readme_msg));
689 static const struct file_operations readme_fops = {
690 .read = inj_readme_read,
693 static struct dfs_node {
695 const struct file_operations *fops;
698 { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
699 { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
700 { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
701 { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
702 { .name = "ipid", .fops = &ipid_fops, .perm = S_IRUSR | S_IWUSR },
703 { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
704 { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
705 { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
706 { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
709 static void __init debugfs_init(void)
713 dfs_inj = debugfs_create_dir("mce-inject", NULL);
715 for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
716 debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
717 &i_mce, dfs_fls[i].fops);
720 static int __init inject_init(void)
722 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
727 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
728 mce_register_injector_chain(&inject_nb);
730 setup_inj_struct(&i_mce);
732 pr_info("Machine check injector initialized\n");
737 static void __exit inject_exit(void)
740 mce_unregister_injector_chain(&inject_nb);
741 unregister_nmi_handler(NMI_LOCAL, "mce_notify");
743 debugfs_remove_recursive(dfs_inj);
746 memset(&dfs_fls, 0, sizeof(dfs_fls));
748 free_cpumask_var(mce_inject_cpumask);
751 module_init(inject_init);
752 module_exit(inject_exit);
753 MODULE_LICENSE("GPL");