commit
e35fca4791fcdd43dc1fd769797df40c562ab491 upstream.
Some edac drivers register themselves as mce decoders via
notifier_chain. But in current notifier_chain implementation logic,
it doesn't accept same notifier registered twice. If so, it will be
wrong when adding/removing the element from the list. For example,
on one SandyBridge platform, remove module sb_edac and then trigger
one error, it will hit oops because it has no mce decoder registered
but related notifier_chain still points to an invalid callback
function. Here is an example:
Call Trace:
[<
ffffffff8150ef6a>] atomic_notifier_call_chain+0x1a/0x20
[<
ffffffff8102b936>] mce_log+0x46/0x180
[<
ffffffff8102eaea>] apei_mce_report_mem_error+0x4a/0x60
[<
ffffffff812e19d2>] ghes_do_proc+0x192/0x210
[<
ffffffff812e2066>] ghes_proc+0x46/0x70
[<
ffffffff812e20d8>] ghes_notify_sci+0x48/0x80
[<
ffffffff8150ef05>] notifier_call_chain+0x55/0x80
[<
ffffffff81076f1a>] __blocking_notifier_call_chain+0x5a/0x80
[<
ffffffff812aea11>] ? acpi_os_wait_events_complete+0x23/0x23
[<
ffffffff81076f56>] blocking_notifier_call_chain+0x16/0x20
[<
ffffffff812ddc4d>] acpi_hed_notify+0x19/0x1b
[<
ffffffff812b16bd>] acpi_device_notify+0x19/0x1b
[<
ffffffff812beb38>] acpi_ev_notify_dispatch+0x67/0x7f
[<
ffffffff812aea3a>] acpi_os_execute_deferred+0x29/0x36
[<
ffffffff81069dc2>] process_one_work+0x132/0x450
[<
ffffffff8106bbcb>] worker_thread+0x17b/0x3c0
[<
ffffffff8106ba50>] ? manage_workers+0x120/0x120
[<
ffffffff81070aee>] kthread+0x9e/0xb0
[<
ffffffff81514724>] kernel_thread_helper+0x4/0x10
[<
ffffffff81070a50>] ? kthread_freezable_should_stop+0x70/0x70
[<
ffffffff81514720>] ? gs_change+0x13/0x13
Code: f3 49 89 d4 45 85 ed 4d 89 c6 48 8b 0f 74 48 48 85 c9 75 17 eb 41
0f 1f 80 00 00 00 00 41 83 ed 01 4c 89 f9 74 22 4d 85 ff 74 1d <4c> 8b
79 08 4c 89 e2 48 89 de 48 89 cf ff 11 4d 85 f6 74 04 41
RIP [<
ffffffff8150eef6>] notifier_call_chain+0x46/0x80
RSP <
ffff88042868fb20>
CR2:
ffffffffa01af838
---[ end trace
0100930068e73e6f ]---
BUG: unable to handle kernel paging request at
fffffffffffffff8
IP: [<
ffffffff810705b0>] kthread_data+0x10/0x20
PGD 1a0d067 PUD 1a0e067 PMD 0
Oops: 0000 [#2] SMP
Only i7core_edac and sb_edac have such issues because they have more
than one memory controller which means they have to register mce
decoder many times.
Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
if (mce->bank != 8)
return NOTIFY_DONE;
-#ifdef CONFIG_SMP
- /* Only handle if it is the right mc controller */
- if (mce->socketid != pvt->i7core_dev->socket)
- return NOTIFY_DONE;
-#endif
-
smp_rmb();
if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
smp_wmb();
if (pvt->enable_scrub)
disable_sdram_scrub_setting(mci);
- mce_unregister_decode_chain(&i7_mce_dec);
-
/* Disable EDAC polling */
i7core_pci_ctl_release(pvt);
/* DCLK for scrub rate setting */
pvt->dclk_freq = get_dclk_freq();
- mce_register_decode_chain(&i7_mce_dec);
-
return 0;
fail0:
pci_rc = pci_register_driver(&i7core_driver);
- if (pci_rc >= 0)
+ if (pci_rc >= 0) {
+ mce_register_decode_chain(&i7_mce_dec);
return 0;
+ }
i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
pci_rc);
{
debugf2("MC: " __FILE__ ": %s()\n", __func__);
pci_unregister_driver(&i7core_driver);
+ mce_unregister_decode_chain(&i7_mce_dec);
}
module_init(i7core_init);
debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
__func__, mci, &sbridge_dev->pdev[0]->dev);
- mce_unregister_decode_chain(&sbridge_mce_dec);
-
/* Remove MC sysfs nodes */
edac_mc_del_mc(mci->dev);
goto fail0;
}
- mce_register_decode_chain(&sbridge_mce_dec);
return 0;
fail0:
pci_rc = pci_register_driver(&sbridge_driver);
- if (pci_rc >= 0)
+ if (pci_rc >= 0) {
+ mce_register_decode_chain(&sbridge_mce_dec);
return 0;
+ }
sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
pci_rc);
{
debugf2("MC: " __FILE__ ": %s()\n", __func__);
pci_unregister_driver(&sbridge_driver);
+ mce_unregister_decode_chain(&sbridge_mce_dec);
}
module_init(sbridge_init);