1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/module.h>
3 #include <linux/slab.h>
9 static struct amd_decoder_ops fam_ops;
11 static u8 xec_mask = 0xf;
13 static void (*decode_dram_ecc)(int node_id, struct mce *m);
15 void amd_register_ecc_decoder(void (*f)(int, struct mce *))
19 EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
21 void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
23 if (decode_dram_ecc) {
24 WARN_ON(decode_dram_ecc != f);
26 decode_dram_ecc = NULL;
29 EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
32 * string representation for the different MCA reported error types, see F3x48
36 /* transaction type */
37 static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
40 static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
42 /* memory transaction type */
43 static const char * const rrrr_msgs[] = {
44 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
47 /* participating processor */
48 const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
49 EXPORT_SYMBOL_GPL(pp_msgs);
52 static const char * const to_msgs[] = { "no timeout", "timed out" };
55 static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
57 /* internal error type */
58 static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
60 static const char * const f15h_mc1_mce_desc[] = {
61 "UC during a demand linefill from L2",
62 "Parity error during data load from IC",
63 "Parity error for IC valid bit",
64 "Main tag parity error",
65 "Parity error in prediction queue",
66 "PFB data/address parity error",
67 "Parity error in the branch status reg",
68 "PFB promotion address error",
69 "Tag error during probe/victimization",
70 "Parity error for IC probe tag valid bit",
71 "PFB non-cacheable bit parity error",
72 "PFB valid bit parity error", /* xec = 0xd */
73 "Microcode Patch Buffer", /* xec = 010 */
81 static const char * const f15h_mc2_mce_desc[] = {
82 "Fill ECC error on data fills", /* xec = 0x4 */
83 "Fill parity error on insn fills",
84 "Prefetcher request FIFO parity error",
85 "PRQ address parity error",
86 "PRQ data parity error",
89 "WCB Data parity error",
90 "VB Data ECC or parity error",
91 "L2 Tag ECC error", /* xec = 0x10 */
92 "Hard L2 Tag ECC error",
93 "Multiple hits on L2 tag",
95 "PRB address parity error"
98 static const char * const mc4_mce_desc[] = {
99 "DRAM ECC error detected on the NB",
100 "CRC error detected on HT link",
101 "Link-defined sync error packets detected on HT link",
104 "Invalid GART PTE entry during GART table walk",
105 "Unsupported atomic RMW received from an IO link",
106 "Watchdog timeout due to lack of progress",
107 "DRAM ECC error detected on the NB",
108 "SVM DMA Exclusion Vector error",
109 "HT data error detected on link",
110 "Protocol error (link, L3, probe filter)",
111 "NB internal arrays parity error",
112 "DRAM addr/ctl signals parity error",
113 "IO link transmission error",
114 "L3 data cache ECC error", /* xec = 0x1c */
115 "L3 cache tag error",
116 "L3 LRU parity bits error",
117 "ECC Error in the Probe Filter directory"
120 static const char * const mc5_mce_desc[] = {
121 "CPU Watchdog timer expire",
122 "Wakeup array dest tag",
126 "Retire dispatch queue",
127 "Mapper checkpoint array",
128 "Physical register file EX0 port",
129 "Physical register file EX1 port",
130 "Physical register file AG0 port",
131 "Physical register file AG1 port",
132 "Flag register file",
134 "Retire status queue"
137 static const char * const mc6_mce_desc[] = {
138 "Hardware Assertion",
140 "Physical Register File",
143 "Status Register File",
146 /* Scalable MCA error strings */
147 static const char * const smca_ls_mce_desc[] = {
148 "Load queue parity error",
149 "Store queue parity error",
150 "Miss address buffer payload parity error",
151 "Level 1 TLB parity error",
152 "DC Tag error type 5",
153 "DC Tag error type 6",
154 "DC Tag error type 1",
155 "Internal error type 1",
156 "Internal error type 2",
157 "System Read Data Error Thread 0",
158 "System Read Data Error Thread 1",
159 "DC Tag error type 2",
160 "DC Data error type 1 and poison consumption",
161 "DC Data error type 2",
162 "DC Data error type 3",
163 "DC Tag error type 4",
164 "Level 2 TLB parity error",
166 "DC Tag error type 3",
167 "DC Tag error type 5",
168 "L2 Fill Data error",
171 static const char * const smca_ls2_mce_desc[] = {
172 "An ECC error was detected on a data cache read by a probe or victimization",
173 "An ECC error or L2 poison was detected on a data cache read by a load",
174 "An ECC error was detected on a data cache read-modify-write by a store",
175 "An ECC error or poison bit mismatch was detected on a tag read by a probe or victimization",
176 "An ECC error or poison bit mismatch was detected on a tag read by a load",
177 "An ECC error or poison bit mismatch was detected on a tag read by a store",
178 "An ECC error was detected on an EMEM read by a load",
179 "An ECC error was detected on an EMEM read-modify-write by a store",
180 "A parity error was detected in an L1 TLB entry by any access",
181 "A parity error was detected in an L2 TLB entry by any access",
182 "A parity error was detected in a PWC entry by any access",
183 "A parity error was detected in an STQ entry by any access",
184 "A parity error was detected in an LDQ entry by any access",
185 "A parity error was detected in a MAB entry by any access",
186 "A parity error was detected in an SCB entry state field by any access",
187 "A parity error was detected in an SCB entry address field by any access",
188 "A parity error was detected in an SCB entry data field by any access",
189 "A parity error was detected in a WCB entry by any access",
190 "A poisoned line was detected in an SCB entry by any access",
191 "A SystemReadDataError error was reported on read data returned from L2 for a load",
192 "A SystemReadDataError error was reported on read data returned from L2 for an SCB store",
193 "A SystemReadDataError error was reported on read data returned from L2 for a WCB store",
194 "A hardware assertion error was reported",
195 "A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access",
198 static const char * const smca_if_mce_desc[] = {
199 "Op Cache Microtag Probe Port Parity Error",
200 "IC Microtag or Full Tag Multi-hit Error",
201 "IC Full Tag Parity Error",
202 "IC Data Array Parity Error",
203 "Decoupling Queue PhysAddr Parity Error",
204 "L0 ITLB Parity Error",
205 "L1 ITLB Parity Error",
206 "L2 ITLB Parity Error",
207 "BPQ Thread 0 Snoop Parity Error",
208 "BPQ Thread 1 Snoop Parity Error",
209 "L1 BTB Multi-Match Error",
210 "L2 BTB Multi-Match Error",
211 "L2 Cache Response Poison Error",
212 "System Read Data Error",
213 "Hardware Assertion Error",
220 static const char * const smca_l2_mce_desc[] = {
221 "L2M Tag Multiple-Way-Hit error",
222 "L2M Tag or State Array ECC Error",
223 "L2M Data Array ECC Error",
224 "Hardware Assert Error",
227 static const char * const smca_de_mce_desc[] = {
228 "Micro-op cache tag parity error",
229 "Micro-op cache data parity error",
230 "Instruction buffer parity error",
231 "Micro-op queue parity error",
232 "Instruction dispatch queue parity error",
233 "Fetch address FIFO parity error",
234 "Patch RAM data parity error",
235 "Patch RAM sequencer parity error",
236 "Micro-op buffer parity error",
237 "Hardware Assertion MCA Error",
240 static const char * const smca_ex_mce_desc[] = {
241 "Watchdog Timeout error",
242 "Physical register file parity error",
243 "Flag register file parity error",
244 "Immediate displacement register file parity error",
245 "Address generator payload parity error",
246 "EX payload parity error",
247 "Checkpoint queue parity error",
248 "Retire dispatch queue parity error",
249 "Retire status queue parity error",
250 "Scheduling queue parity error",
251 "Branch buffer queue parity error",
252 "Hardware Assertion error",
253 "Spec Map parity error",
254 "Retire Map parity error",
257 static const char * const smca_fp_mce_desc[] = {
258 "Physical register file (PRF) parity error",
259 "Freelist (FL) parity error",
260 "Schedule queue parity error",
262 "Retire queue (RQ) parity error",
263 "Status register file (SRF) parity error",
264 "Hardware assertion",
267 static const char * const smca_l3_mce_desc[] = {
268 "Shadow Tag Macro ECC Error",
269 "Shadow Tag Macro Multi-way-hit Error",
271 "L3M Tag Multi-way-hit Error",
272 "L3M Data ECC Error",
273 "SDP Parity Error or SystemReadDataError from XI",
274 "L3 Victim Queue Parity Error",
275 "L3 Hardware Assertion",
278 static const char * const smca_cs_mce_desc[] = {
281 "Security Violation",
283 "Unexpected Response",
284 "Request or Probe Parity Error",
285 "Read Response Parity Error",
286 "Atomic Request Parity Error",
287 "Probe Filter ECC Error",
290 static const char * const smca_cs2_mce_desc[] = {
293 "Security Violation",
295 "Unexpected Response",
296 "Request or Probe Parity Error",
297 "Read Response Parity Error",
298 "Atomic Request Parity Error",
299 "SDP read response had no match in the CS queue",
300 "Probe Filter Protocol Error",
301 "Probe Filter ECC Error",
302 "SDP read response had an unexpected RETRY error",
303 "Counter overflow error",
304 "Counter underflow error",
307 static const char * const smca_pie_mce_desc[] = {
309 "Register security violation",
311 "Poison data consumption",
312 "A deferred error was detected in the DF"
315 static const char * const smca_umc_mce_desc[] = {
319 "Advanced peripheral bus error",
320 "Address/Command parity error",
321 "Write data CRC error",
322 "DCQ SRAM ECC error",
323 "AES SRAM ECC error",
326 static const char * const smca_umc2_mce_desc[] = {
331 "Address/Command parity error",
332 "Write data parity error",
333 "DCQ SRAM ECC error",
335 "Read data parity error",
336 "Rdb SRAM ECC error",
337 "RdRsp SRAM ECC error",
341 static const char * const smca_pb_mce_desc[] = {
342 "An ECC error in the Parameter Block RAM array",
345 static const char * const smca_psp_mce_desc[] = {
346 "An ECC or parity error in a PSP RAM instance",
349 static const char * const smca_psp2_mce_desc[] = {
350 "High SRAM ECC or parity error",
351 "Low SRAM ECC or parity error",
352 "Instruction Cache Bank 0 ECC or parity error",
353 "Instruction Cache Bank 1 ECC or parity error",
354 "Instruction Tag Ram 0 parity error",
355 "Instruction Tag Ram 1 parity error",
356 "Data Cache Bank 0 ECC or parity error",
357 "Data Cache Bank 1 ECC or parity error",
358 "Data Cache Bank 2 ECC or parity error",
359 "Data Cache Bank 3 ECC or parity error",
360 "Data Tag Bank 0 parity error",
361 "Data Tag Bank 1 parity error",
362 "Data Tag Bank 2 parity error",
363 "Data Tag Bank 3 parity error",
364 "Dirty Data Ram parity error",
365 "TLB Bank 0 parity error",
366 "TLB Bank 1 parity error",
367 "System Hub Read Buffer ECC or parity error",
370 static const char * const smca_smu_mce_desc[] = {
371 "An ECC or parity error in an SMU RAM instance",
374 static const char * const smca_smu2_mce_desc[] = {
375 "High SRAM ECC or parity error",
376 "Low SRAM ECC or parity error",
377 "Data Cache Bank A ECC or parity error",
378 "Data Cache Bank B ECC or parity error",
379 "Data Tag Cache Bank A ECC or parity error",
380 "Data Tag Cache Bank B ECC or parity error",
381 "Instruction Cache Bank A ECC or parity error",
382 "Instruction Cache Bank B ECC or parity error",
383 "Instruction Tag Cache Bank A ECC or parity error",
384 "Instruction Tag Cache Bank B ECC or parity error",
385 "System Hub Read Buffer ECC or parity error",
389 static const char * const smca_mp5_mce_desc[] = {
390 "High SRAM ECC or parity error",
391 "Low SRAM ECC or parity error",
392 "Data Cache Bank A ECC or parity error",
393 "Data Cache Bank B ECC or parity error",
394 "Data Tag Cache Bank A ECC or parity error",
395 "Data Tag Cache Bank B ECC or parity error",
396 "Instruction Cache Bank A ECC or parity error",
397 "Instruction Cache Bank B ECC or parity error",
398 "Instruction Tag Cache Bank A ECC or parity error",
399 "Instruction Tag Cache Bank B ECC or parity error",
402 static const char * const smca_mpdma_mce_desc[] = {
403 "Main SRAM [31:0] bank ECC or parity error",
404 "Main SRAM [63:32] bank ECC or parity error",
405 "Main SRAM [95:64] bank ECC or parity error",
406 "Main SRAM [127:96] bank ECC or parity error",
407 "Data Cache Bank A ECC or parity error",
408 "Data Cache Bank B ECC or parity error",
409 "Data Tag Cache Bank A ECC or parity error",
410 "Data Tag Cache Bank B ECC or parity error",
411 "Instruction Cache Bank A ECC or parity error",
412 "Instruction Cache Bank B ECC or parity error",
413 "Instruction Tag Cache Bank A ECC or parity error",
414 "Instruction Tag Cache Bank B ECC or parity error",
415 "Data Cache Bank A ECC or parity error",
416 "Data Cache Bank B ECC or parity error",
417 "Data Tag Cache Bank A ECC or parity error",
418 "Data Tag Cache Bank B ECC or parity error",
419 "Instruction Cache Bank A ECC or parity error",
420 "Instruction Cache Bank B ECC or parity error",
421 "Instruction Tag Cache Bank A ECC or parity error",
422 "Instruction Tag Cache Bank B ECC or parity error",
423 "Data Cache Bank A ECC or parity error",
424 "Data Cache Bank B ECC or parity error",
425 "Data Tag Cache Bank A ECC or parity error",
426 "Data Tag Cache Bank B ECC or parity error",
427 "Instruction Cache Bank A ECC or parity error",
428 "Instruction Cache Bank B ECC or parity error",
429 "Instruction Tag Cache Bank A ECC or parity error",
430 "Instruction Tag Cache Bank B ECC or parity error",
431 "System Hub Read Buffer ECC or parity error",
432 "MPDMA TVF DVSEC Memory ECC or parity error",
433 "MPDMA TVF MMIO Mailbox0 ECC or parity error",
434 "MPDMA TVF MMIO Mailbox1 ECC or parity error",
435 "MPDMA TVF Doorbell Memory ECC or parity error",
436 "MPDMA TVF SDP Slave Memory 0 ECC or parity error",
437 "MPDMA TVF SDP Slave Memory 1 ECC or parity error",
438 "MPDMA TVF SDP Slave Memory 2 ECC or parity error",
439 "MPDMA TVF SDP Master Memory 0 ECC or parity error",
440 "MPDMA TVF SDP Master Memory 1 ECC or parity error",
441 "MPDMA TVF SDP Master Memory 2 ECC or parity error",
442 "MPDMA TVF SDP Master Memory 3 ECC or parity error",
443 "MPDMA TVF SDP Master Memory 4 ECC or parity error",
444 "MPDMA TVF SDP Master Memory 5 ECC or parity error",
445 "MPDMA TVF SDP Master Memory 6 ECC or parity error",
446 "MPDMA PTE Command FIFO ECC or parity error",
447 "MPDMA PTE Hub Data FIFO ECC or parity error",
448 "MPDMA PTE Internal Data FIFO ECC or parity error",
449 "MPDMA PTE Command Memory DMA ECC or parity error",
450 "MPDMA PTE Command Memory Internal ECC or parity error",
451 "MPDMA PTE DMA Completion FIFO ECC or parity error",
452 "MPDMA PTE Tablewalk Completion FIFO ECC or parity error",
453 "MPDMA PTE Descriptor Completion FIFO ECC or parity error",
454 "MPDMA PTE ReadOnly Completion FIFO ECC or parity error",
455 "MPDMA PTE DirectWrite Completion FIFO ECC or parity error",
456 "SDP Watchdog Timer expired",
459 static const char * const smca_nbio_mce_desc[] = {
460 "ECC or Parity error",
462 "SDP ErrEvent error",
463 "SDP Egress Poison Error",
464 "IOHC Internal Poison Error",
467 static const char * const smca_pcie_mce_desc[] = {
468 "CCIX PER Message logging",
469 "CCIX Read Response with Status: Non-Data Error",
470 "CCIX Write Response with Status: Non-Data Error",
471 "CCIX Read Response with Status: Data Error",
472 "CCIX Non-okay write response with data error",
475 static const char * const smca_pcie2_mce_desc[] = {
476 "SDP Parity Error logging",
479 static const char * const smca_xgmipcs_mce_desc[] = {
482 "Flow Control Acknowledge Error",
483 "Rx Fifo Underflow Error",
484 "Rx Fifo Overflow Error",
486 "BER Exceeded Error",
487 "Tx Vcid Data Error",
488 "Replay Buffer Parity Error",
490 "Replay Fifo Overflow Error",
491 "Replay Fifo Underflow Error",
492 "Elastic Fifo Overflow Error",
494 "Flow Control CRC Error",
495 "Data Startup Limit Error",
496 "FC Init Timeout Error",
497 "Recovery Timeout Error",
498 "Ready Serial Timeout Error",
499 "Ready Serial Attempt Error",
500 "Recovery Attempt Error",
501 "Recovery Relock Attempt Error",
502 "Replay Attempt Error",
504 "Tx Replay Timeout Error",
505 "Rx Replay Timeout Error",
506 "LinkSub Tx Timeout Error",
507 "LinkSub Rx Timeout Error",
508 "Rx CMD Packet Error",
511 static const char * const smca_xgmiphy_mce_desc[] = {
513 "ARC instruction buffer parity error",
514 "ARC data buffer parity error",
518 static const char * const smca_nbif_mce_desc[] = {
519 "Timeout error from GMI",
525 static const char * const smca_sata_mce_desc[] = {
526 "Parity error for port 0",
527 "Parity error for port 1",
528 "Parity error for port 2",
529 "Parity error for port 3",
530 "Parity error for port 4",
531 "Parity error for port 5",
532 "Parity error for port 6",
533 "Parity error for port 7",
536 static const char * const smca_usb_mce_desc[] = {
537 "Parity error or ECC error for S0 RAM0",
538 "Parity error or ECC error for S0 RAM1",
539 "Parity error or ECC error for S0 RAM2",
540 "Parity error for PHY RAM0",
541 "Parity error for PHY RAM1",
542 "AXI Slave Response error",
545 static const char * const smca_gmipcs_mce_desc[] = {
548 "Replay Parity Error",
549 "Rx Fifo Underflow Error",
550 "Rx Fifo Overflow Error",
552 "BER Exceeded Error",
553 "Tx Fifo Underflow Error",
554 "Replay Buffer Parity Error",
556 "Replay Fifo Overflow Error",
557 "Replay Fifo Underflow Error",
558 "Elastic Fifo Overflow Error",
561 "Data Startup Limit Error",
562 "FC Init Timeout Error",
563 "Recovery Timeout Error",
564 "Ready Serial Timeout Error",
565 "Ready Serial Attempt Error",
566 "Recovery Attempt Error",
567 "Recovery Relock Attempt Error",
568 "Deskew Abort Error",
570 "Rx LFDS Fifo Overflow Error",
571 "Rx LFDS Fifo Underflow Error",
572 "LinkSub Tx Timeout Error",
573 "LinkSub Rx Timeout Error",
574 "Rx CMD Packet Error",
575 "LFDS Training Timeout Error",
576 "LFDS FC Init Timeout Error",
580 struct smca_mce_desc {
581 const char * const *descs;
582 unsigned int num_descs;
585 static struct smca_mce_desc smca_mce_descs[] = {
586 [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
587 [SMCA_LS_V2] = { smca_ls2_mce_desc, ARRAY_SIZE(smca_ls2_mce_desc) },
588 [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
589 [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
590 [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
591 [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) },
592 [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
593 [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
594 [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
595 [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
596 [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
597 [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
598 [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) },
599 [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
600 [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
601 [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) },
602 [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
603 [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) },
604 [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
605 [SMCA_MPDMA] = { smca_mpdma_mce_desc, ARRAY_SIZE(smca_mpdma_mce_desc) },
606 [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
607 [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
608 [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) },
609 [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) },
610 /* NBIF and SHUB have the same error descriptions, for now. */
611 [SMCA_NBIF] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) },
612 [SMCA_SHUB] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) },
613 [SMCA_SATA] = { smca_sata_mce_desc, ARRAY_SIZE(smca_sata_mce_desc) },
614 [SMCA_USB] = { smca_usb_mce_desc, ARRAY_SIZE(smca_usb_mce_desc) },
615 [SMCA_GMI_PCS] = { smca_gmipcs_mce_desc, ARRAY_SIZE(smca_gmipcs_mce_desc) },
616 /* All the PHY bank types have the same error descriptions, for now. */
617 [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
618 [SMCA_WAFL_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
619 [SMCA_GMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
622 static bool f12h_mc0_mce(u16 ec, u8 xec)
631 pr_cont("during L1 linefill from L2.\n");
632 else if (ll == LL_L1)
633 pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
640 static bool f10h_mc0_mce(u16 ec, u8 xec)
642 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
643 pr_cont("during data scrub.\n");
646 return f12h_mc0_mce(ec, xec);
649 static bool k8_mc0_mce(u16 ec, u8 xec)
652 pr_cont("during system linefill.\n");
656 return f10h_mc0_mce(ec, xec);
659 static bool cat_mc0_mce(u16 ec, u8 xec)
666 if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
672 pr_cont("Data/Tag parity error due to %s.\n",
673 (r4 == R4_DRD ? "load/hw prf" : "store"));
676 pr_cont("Copyback parity error on a tag miss.\n");
679 pr_cont("Tag parity error during snoop.\n");
684 } else if (BUS_ERROR(ec)) {
686 if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
689 pr_cont("System read data error on a ");
693 pr_cont("TLB reload.\n");
711 static bool f15h_mc0_mce(u16 ec, u8 xec)
719 pr_cont("Data Array access error.\n");
723 pr_cont("UC error during a linefill from L2/NB.\n");
728 pr_cont("STQ access error.\n");
732 pr_cont("SCB access error.\n");
736 pr_cont("Tag error.\n");
740 pr_cont("LDQ access error.\n");
746 } else if (BUS_ERROR(ec)) {
749 pr_cont("System Read Data Error.\n");
751 pr_cont(" Internal error condition type %d.\n", xec);
752 } else if (INT_ERROR(ec)) {
754 pr_cont("Hardware Assert.\n");
764 static void decode_mc0_mce(struct mce *m)
766 u16 ec = EC(m->status);
767 u8 xec = XEC(m->status, xec_mask);
769 pr_emerg(HW_ERR "MC0 Error: ");
771 /* TLB error signatures are the same across families */
773 if (TT(ec) == TT_DATA) {
774 pr_cont("%s TLB %s.\n", LL_MSG(ec),
775 ((xec == 2) ? "locked miss"
776 : (xec ? "multimatch" : "parity")));
779 } else if (fam_ops.mc0_mce(ec, xec))
782 pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
785 static bool k8_mc1_mce(u16 ec, u8 xec)
794 pr_cont("during a linefill from L2.\n");
795 else if (ll == 0x1) {
798 pr_cont("Parity error during data load.\n");
802 pr_cont("Copyback Parity/Victim error.\n");
806 pr_cont("Tag Snoop error.\n");
819 static bool cat_mc1_mce(u16 ec, u8 xec)
827 if (TT(ec) != TT_INSTR)
831 pr_cont("Data/tag array parity error for a tag hit.\n");
832 else if (r4 == R4_SNOOP)
833 pr_cont("Tag error during snoop/victimization.\n");
835 pr_cont("Tag parity error from victim castout.\n");
837 pr_cont("Microcode patch RAM parity error.\n");
844 static bool f15h_mc1_mce(u16 ec, u8 xec)
853 pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
857 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
861 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
865 pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
874 static void decode_mc1_mce(struct mce *m)
876 u16 ec = EC(m->status);
877 u8 xec = XEC(m->status, xec_mask);
879 pr_emerg(HW_ERR "MC1 Error: ");
882 pr_cont("%s TLB %s.\n", LL_MSG(ec),
883 (xec ? "multimatch" : "parity error"));
884 else if (BUS_ERROR(ec)) {
885 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
887 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
888 } else if (INT_ERROR(ec)) {
890 pr_cont("Hardware Assert.\n");
893 } else if (fam_ops.mc1_mce(ec, xec))
901 pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
904 static bool k8_mc2_mce(u16 ec, u8 xec)
909 pr_cont(" in the write data buffers.\n");
911 pr_cont(" in the victim data buffers.\n");
912 else if (xec == 0x2 && MEM_ERROR(ec))
913 pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
914 else if (xec == 0x0) {
916 pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
918 else if (BUS_ERROR(ec))
919 pr_cont(": %s/ECC error in data read from NB: %s.\n",
920 R4_MSG(ec), PP_MSG(ec));
921 else if (MEM_ERROR(ec)) {
925 pr_cont(": %s error during data copyback.\n",
928 pr_cont(": %s parity/ECC error during data "
929 "access from L2.\n", R4_MSG(ec));
940 static bool f15h_mc2_mce(u16 ec, u8 xec)
946 pr_cont("Data parity TLB read error.\n");
948 pr_cont("Poison data provided for TLB fill.\n");
951 } else if (BUS_ERROR(ec)) {
955 pr_cont("Error during attempted NB data read.\n");
956 } else if (MEM_ERROR(ec)) {
959 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
963 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
969 } else if (INT_ERROR(ec)) {
971 pr_cont("Hardware Assert.\n");
979 static bool f16h_mc2_mce(u16 ec, u8 xec)
988 pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
993 pr_cont("ECC error in L2 tag (%s).\n",
994 ((r4 == R4_GEN) ? "BankReq" :
995 ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
1000 pr_cont("ECC error in L2 data array (%s).\n",
1001 (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
1002 ((r4 == R4_GEN) ? "Attr" :
1003 ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
1008 pr_cont("Parity error in L2 attribute bits (%s).\n",
1009 ((r4 == R4_RD) ? "Hit" :
1010 ((r4 == R4_GEN) ? "Attr" : "Fill")));
1020 static void decode_mc2_mce(struct mce *m)
1022 u16 ec = EC(m->status);
1023 u8 xec = XEC(m->status, xec_mask);
1025 pr_emerg(HW_ERR "MC2 Error: ");
1027 if (!fam_ops.mc2_mce(ec, xec))
1028 pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
1031 static void decode_mc3_mce(struct mce *m)
1033 u16 ec = EC(m->status);
1034 u8 xec = XEC(m->status, xec_mask);
1036 if (boot_cpu_data.x86 >= 0x14) {
1037 pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
1038 " please report on LKML.\n");
1042 pr_emerg(HW_ERR "MC3 Error");
1047 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
1050 pr_cont(" during %s.\n", R4_MSG(ec));
1057 pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
1060 static void decode_mc4_mce(struct mce *m)
1062 unsigned int fam = x86_family(m->cpuid);
1063 int node_id = topology_die_id(m->extcpu);
1064 u16 ec = EC(m->status);
1065 u8 xec = XEC(m->status, 0x1f);
1068 pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
1073 /* special handling for DRAM ECCs */
1074 if (xec == 0x0 || xec == 0x8) {
1075 /* no ECCs on F11h */
1079 pr_cont("%s.\n", mc4_mce_desc[xec]);
1081 if (decode_dram_ecc)
1082 decode_dram_ecc(node_id, m);
1089 pr_cont("GART Table Walk data error.\n");
1090 else if (BUS_ERROR(ec))
1091 pr_cont("DMA Exclusion Vector Table Walk error.\n");
1097 if (fam == 0x15 || fam == 0x16)
1098 pr_cont("Compute Unit Data Error.\n");
1111 pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
1115 pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
1118 static void decode_mc5_mce(struct mce *m)
1120 unsigned int fam = x86_family(m->cpuid);
1121 u16 ec = EC(m->status);
1122 u8 xec = XEC(m->status, xec_mask);
1124 if (fam == 0xf || fam == 0x11)
1127 pr_emerg(HW_ERR "MC5 Error: ");
1129 if (INT_ERROR(ec)) {
1131 pr_cont("Hardware Assert.\n");
1137 if (xec == 0x0 || xec == 0xc)
1138 pr_cont("%s.\n", mc5_mce_desc[xec]);
1139 else if (xec <= 0xd)
1140 pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
1147 pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
1150 static void decode_mc6_mce(struct mce *m)
1152 u8 xec = XEC(m->status, xec_mask);
1154 pr_emerg(HW_ERR "MC6 Error: ");
1159 pr_cont("%s parity error.\n", mc6_mce_desc[xec]);
1163 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
1166 /* Decode errors according to Scalable MCA specification */
1167 static void decode_smca_error(struct mce *m)
1169 enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
1170 const char *ip_name;
1171 u8 xec = XEC(m->status, xec_mask);
1173 if (bank_type >= N_SMCA_BANK_TYPES)
1176 if (bank_type == SMCA_RESERVED) {
1177 pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
1181 ip_name = smca_get_long_name(bank_type);
1183 pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
1185 /* Only print the decode of valid error codes */
1186 if (xec < smca_mce_descs[bank_type].num_descs)
1187 pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
1189 if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
1190 decode_dram_ecc(topology_die_id(m->extcpu), m);
1193 static inline void amd_decode_err_code(u16 ec)
1195 if (INT_ERROR(ec)) {
1196 pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
1200 pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
1203 pr_cont(", mem/io: %s", II_MSG(ec));
1205 pr_cont(", tx: %s", TT_MSG(ec));
1207 if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
1208 pr_cont(", mem-tx: %s", R4_MSG(ec));
1211 pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
1217 static const char *decode_error_status(struct mce *m)
1219 if (m->status & MCI_STATUS_UC) {
1220 if (m->status & MCI_STATUS_PCC)
1221 return "System Fatal error.";
1222 if (m->mcgstatus & MCG_STATUS_RIPV)
1223 return "Uncorrected, software restartable error.";
1224 return "Uncorrected, software containable error.";
1227 if (m->status & MCI_STATUS_DEFERRED)
1228 return "Deferred error, no action required.";
1230 return "Corrected error, no action required.";
1234 amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
1236 struct mce *m = (struct mce *)data;
1237 unsigned int fam = x86_family(m->cpuid);
1240 if (m->kflags & MCE_HANDLED_CEC)
1243 pr_emerg(HW_ERR "%s\n", decode_error_status(m));
1245 pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
1247 fam, x86_model(m->cpuid), x86_stepping(m->cpuid),
1249 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
1250 ((m->status & MCI_STATUS_UC) ? "UE" :
1251 (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
1252 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
1253 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"),
1254 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"));
1256 if (boot_cpu_has(X86_FEATURE_SMCA)) {
1258 u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
1260 if (!rdmsr_safe(addr, &low, &high) &&
1261 (low & MCI_CONFIG_MCAX))
1262 pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
1264 pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
1267 /* do the two bits[14:13] together */
1268 ecc = (m->status >> 45) & 0x3;
1270 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
1273 pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
1275 /* F15h, bank4, bit 43 is part of McaStatSubCache. */
1276 if (fam != 0x15 || m->bank != 4)
1277 pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
1281 pr_cont("|%s", (m->status & MCI_STATUS_SCRUB ? "Scrub" : "-"));
1283 pr_cont("]: 0x%016llx\n", m->status);
1285 if (m->status & MCI_STATUS_ADDRV)
1286 pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr);
1289 pr_emerg(HW_ERR "PPIN: 0x%016llx\n", m->ppin);
1291 if (boot_cpu_has(X86_FEATURE_SMCA)) {
1292 pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid);
1294 if (m->status & MCI_STATUS_SYNDV)
1295 pr_cont(", Syndrome: 0x%016llx", m->synd);
1299 decode_smca_error(m);
1304 pr_emerg(HW_ERR "TSC: %llu\n", m->tsc);
1306 /* Doesn't matter which member to test. */
1307 if (!fam_ops.mc0_mce)
1344 amd_decode_err_code(m->status & 0xffff);
1346 m->kflags |= MCE_HANDLED_EDAC;
1350 static struct notifier_block amd_mce_dec_nb = {
1351 .notifier_call = amd_decode_mce,
1352 .priority = MCE_PRIO_EDAC,
1355 static int __init mce_amd_init(void)
1357 struct cpuinfo_x86 *c = &boot_cpu_data;
1359 if (c->x86_vendor != X86_VENDOR_AMD &&
1360 c->x86_vendor != X86_VENDOR_HYGON)
1363 if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
1366 if (boot_cpu_has(X86_FEATURE_SMCA)) {
1373 fam_ops.mc0_mce = k8_mc0_mce;
1374 fam_ops.mc1_mce = k8_mc1_mce;
1375 fam_ops.mc2_mce = k8_mc2_mce;
1379 fam_ops.mc0_mce = f10h_mc0_mce;
1380 fam_ops.mc1_mce = k8_mc1_mce;
1381 fam_ops.mc2_mce = k8_mc2_mce;
1385 fam_ops.mc0_mce = k8_mc0_mce;
1386 fam_ops.mc1_mce = k8_mc1_mce;
1387 fam_ops.mc2_mce = k8_mc2_mce;
1391 fam_ops.mc0_mce = f12h_mc0_mce;
1392 fam_ops.mc1_mce = k8_mc1_mce;
1393 fam_ops.mc2_mce = k8_mc2_mce;
1397 fam_ops.mc0_mce = cat_mc0_mce;
1398 fam_ops.mc1_mce = cat_mc1_mce;
1399 fam_ops.mc2_mce = k8_mc2_mce;
1403 xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
1405 fam_ops.mc0_mce = f15h_mc0_mce;
1406 fam_ops.mc1_mce = f15h_mc1_mce;
1407 fam_ops.mc2_mce = f15h_mc2_mce;
1412 fam_ops.mc0_mce = cat_mc0_mce;
1413 fam_ops.mc1_mce = cat_mc1_mce;
1414 fam_ops.mc2_mce = f16h_mc2_mce;
1419 pr_warn_once("Decoding supported only on Scalable MCA processors.\n");
1423 printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
1428 pr_info("MCE: In-kernel MCE decoding enabled.\n");
1430 mce_register_decode_chain(&amd_mce_dec_nb);
1434 early_initcall(mce_amd_init);
1437 static void __exit mce_amd_exit(void)
1439 mce_unregister_decode_chain(&amd_mce_dec_nb);
1442 MODULE_DESCRIPTION("AMD MCE decoder");
1443 MODULE_ALIAS("edac-mce-amd");
1444 MODULE_LICENSE("GPL");
1445 module_exit(mce_amd_exit);