break;
}
}
+
+uint64_t get_mce_fault_addr(struct machine_check_event *evt)
+{
+ switch (evt->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ if (evt->u.ue_error.effective_address_provided)
+ return evt->u.ue_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ if (evt->u.slb_error.effective_address_provided)
+ return evt->u.slb_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ if (evt->u.erat_error.effective_address_provided)
+ return evt->u.erat_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ if (evt->u.tlb_error.effective_address_provided)
+ return evt->u.tlb_error.effective_address;
+ break;
+ default:
+ case MCE_ERROR_TYPE_UNKNOWN:
+ break;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(get_mce_fault_addr);
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include <linux/kobject.h>
#include <asm/opal.h>
#include <asm/firmware.h>
return written;
}
+static int opal_recover_mce(struct pt_regs *regs,
+ struct machine_check_event *evt)
+{
+ int recovered = 0;
+ uint64_t ea = get_mce_fault_addr(evt);
+
+ if (!(regs->msr & MSR_RI)) {
+ /* If MSR_RI isn't set, we cannot recover */
+ recovered = 0;
+ } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
+ /* Platform corrected itself */
+ recovered = 1;
+ } else if (ea && !is_kernel_addr(ea)) {
+ /*
+ * Faulting address is not in kernel text. We should be fine.
+ * We need to find which process uses this address.
+ * For now, kill the task if we have received exception when
+ * in userspace.
+ *
+ * TODO: Queue up this address for hwpoisioning later.
+ */
+ if (user_mode(regs) && !is_global_init(current)) {
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ } else
+ recovered = 0;
+ } else if (user_mode(regs) && !is_global_init(current) &&
+ evt->severity == MCE_SEV_ERROR_SYNC) {
+ /*
+ * If we have received a synchronous error when in userspace
+ * kill the task.
+ */
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ }
+ return recovered;
+}
+
int opal_machine_check(struct pt_regs *regs)
{
struct machine_check_event evt;
}
machine_check_print_event_info(&evt);
- return evt.severity == MCE_SEV_FATAL ? 0 : 1;
+ if (opal_recover_mce(regs, &evt))
+ return 1;
+ return 0;
}
static irqreturn_t opal_interrupt(int irq, void *data)