From 01ca79f1411eae2a45352709c838b946b1af9fbd Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 May 2009 21:56:52 +0200 Subject: [PATCH] x86, mce: add machine check exception count in /proc/interrupts Useful for debugging, but it's also good general policy to have a counter for all special interrupts there. This makes it easier to diagnose where a CPU is spending its time. [ Impact: feature, debugging tool ] Signed-off-by: Andi Kleen Signed-off-by: Hidetoshi Seto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/mce.h | 3 +++ arch/x86/kernel/cpu/mcheck/mce.c | 4 ++++ arch/x86/kernel/irq.c | 10 ++++++++++ 3 files changed, 17 insertions(+) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ac6e030..1156dae 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -89,6 +89,7 @@ struct mce_log { extern int mce_disabled; #include +#include void mce_setup(struct mce *m); void mce_log(struct mce *m); @@ -123,6 +124,8 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } int mce_available(struct cpuinfo_x86 *c); +DECLARE_PER_CPU(unsigned, mce_exception_count); + void mce_log_therm_throt_event(__u64 status); extern atomic_t mce_entry; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 1d0aa9c..287268d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -57,6 +57,8 @@ int mce_disabled; atomic_t mce_entry; +DEFINE_PER_CPU(unsigned, mce_exception_count); + /* * Tolerant levels: * 0: always panic on uncorrected errors, log corrected errors @@ -359,6 +361,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) atomic_inc(&mce_entry); + __get_cpu_var(mce_exception_count)++; + if (notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL) == NOTIFY_STOP) goto out; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index a05660b..05fc635 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -12,6 +12,7 @@ #include #include #include +#include atomic_t irq_err_count; @@ -94,6 +95,12 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, " Threshold APIC interrupts\n"); # endif #endif +#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) + seq_printf(p, "%*s: ", prec, "MCE"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); + seq_printf(p, " Machine check exceptions\n"); +#endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); @@ -161,6 +168,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu) { u64 sum = irq_stats(cpu)->__nmi_count; +#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) + sum += per_cpu(mce_exception_count, cpu); +#endif #ifdef CONFIG_X86_LOCAL_APIC sum += irq_stats(cpu)->apic_timer_irqs; sum += irq_stats(cpu)->irq_spurious_count; -- 2.7.4