1 // SPDX-License-Identifier: GPL-2.0-only
3 * Ampere Computing SoC's SMpro Error Monitoring Driver
5 * Copyright (c) 2022, Ampere Computing LLC
10 #include <linux/mod_devicetable.h>
11 #include <linux/module.h>
12 #include <linux/platform_device.h>
13 #include <linux/regmap.h>
15 /* GPI RAS Error Registers */
16 #define GPI_RAS_ERR 0x7E
18 /* Core and L2C Error Registers */
19 #define CORE_CE_ERR_CNT 0x80
20 #define CORE_CE_ERR_LEN 0x81
21 #define CORE_CE_ERR_DATA 0x82
22 #define CORE_UE_ERR_CNT 0x83
23 #define CORE_UE_ERR_LEN 0x84
24 #define CORE_UE_ERR_DATA 0x85
26 /* Memory Error Registers */
27 #define MEM_CE_ERR_CNT 0x90
28 #define MEM_CE_ERR_LEN 0x91
29 #define MEM_CE_ERR_DATA 0x92
30 #define MEM_UE_ERR_CNT 0x93
31 #define MEM_UE_ERR_LEN 0x94
32 #define MEM_UE_ERR_DATA 0x95
34 /* RAS Error/Warning Registers */
35 #define ERR_SMPRO_TYPE 0xA0
36 #define ERR_PMPRO_TYPE 0xA1
37 #define ERR_SMPRO_INFO_LO 0xA2
38 #define ERR_SMPRO_INFO_HI 0xA3
39 #define ERR_SMPRO_DATA_LO 0xA4
40 #define ERR_SMPRO_DATA_HI 0xA5
41 #define WARN_SMPRO_INFO_LO 0xAA
42 #define WARN_SMPRO_INFO_HI 0xAB
43 #define ERR_PMPRO_INFO_LO 0xA6
44 #define ERR_PMPRO_INFO_HI 0xA7
45 #define ERR_PMPRO_DATA_LO 0xA8
46 #define ERR_PMPRO_DATA_HI 0xA9
47 #define WARN_PMPRO_INFO_LO 0xAC
48 #define WARN_PMPRO_INFO_HI 0xAD
50 /* Boot Stage Register */
51 #define BOOTSTAGE 0xB0
52 #define DIMM_SYNDROME_SEL 0xB4
53 #define DIMM_SYNDROME_ERR 0xB5
54 #define DIMM_SYNDROME_STAGE 4
56 /* PCIE Error Registers */
57 #define PCIE_CE_ERR_CNT 0xC0
58 #define PCIE_CE_ERR_LEN 0xC1
59 #define PCIE_CE_ERR_DATA 0xC2
60 #define PCIE_UE_ERR_CNT 0xC3
61 #define PCIE_UE_ERR_LEN 0xC4
62 #define PCIE_UE_ERR_DATA 0xC5
64 /* Other Error Registers */
65 #define OTHER_CE_ERR_CNT 0xD0
66 #define OTHER_CE_ERR_LEN 0xD1
67 #define OTHER_CE_ERR_DATA 0xD2
68 #define OTHER_UE_ERR_CNT 0xD8
69 #define OTHER_UE_ERR_LEN 0xD9
70 #define OTHER_UE_ERR_DATA 0xDA
72 /* Event Data Registers */
73 #define VRD_WARN_FAULT_EVENT_DATA 0x78
74 #define VRD_HOT_EVENT_DATA 0x79
75 #define DIMM_HOT_EVENT_DATA 0x7A
76 #define DIMM_2X_REFRESH_EVENT_DATA 0x96
78 #define MAX_READ_BLOCK_LENGTH 48
80 #define RAS_SMPRO_ERR 0
81 #define RAS_PMPRO_ERR 1
83 enum RAS_48BYTES_ERR_TYPES {
95 struct smpro_error_hdr {
96 u8 count; /* Number of the RAS errors */
97 u8 len; /* Number of data bytes */
98 u8 data; /* Start of 48-byte data */
99 u8 max_cnt; /* Max num of errors */
103 * Included Address of registers to get Count, Length of data and Data
104 * of the 48 bytes error data
106 static struct smpro_error_hdr smpro_error_table[] = {
108 .count = CORE_CE_ERR_CNT,
109 .len = CORE_CE_ERR_LEN,
110 .data = CORE_CE_ERR_DATA,
114 .count = CORE_UE_ERR_CNT,
115 .len = CORE_UE_ERR_LEN,
116 .data = CORE_UE_ERR_DATA,
120 .count = MEM_CE_ERR_CNT,
121 .len = MEM_CE_ERR_LEN,
122 .data = MEM_CE_ERR_DATA,
126 .count = MEM_UE_ERR_CNT,
127 .len = MEM_UE_ERR_LEN,
128 .data = MEM_UE_ERR_DATA,
132 .count = PCIE_CE_ERR_CNT,
133 .len = PCIE_CE_ERR_LEN,
134 .data = PCIE_CE_ERR_DATA,
138 .count = PCIE_UE_ERR_CNT,
139 .len = PCIE_UE_ERR_LEN,
140 .data = PCIE_UE_ERR_DATA,
144 .count = OTHER_CE_ERR_CNT,
145 .len = OTHER_CE_ERR_LEN,
146 .data = OTHER_CE_ERR_DATA,
150 .count = OTHER_UE_ERR_CNT,
151 .len = OTHER_UE_ERR_LEN,
152 .data = OTHER_UE_ERR_DATA,
158 * List of SCP registers which are used to get
159 * one type of RAS Internal errors.
161 struct smpro_int_error_hdr {
171 static struct smpro_int_error_hdr list_smpro_int_error_hdr[] = {
173 .type = ERR_SMPRO_TYPE,
174 .info_l = ERR_SMPRO_INFO_LO,
175 .info_h = ERR_SMPRO_INFO_HI,
176 .data_l = ERR_SMPRO_DATA_LO,
177 .data_h = ERR_SMPRO_DATA_HI,
178 .warn_l = WARN_SMPRO_INFO_LO,
179 .warn_h = WARN_SMPRO_INFO_HI,
182 .type = ERR_PMPRO_TYPE,
183 .info_l = ERR_PMPRO_INFO_LO,
184 .info_h = ERR_PMPRO_INFO_HI,
185 .data_l = ERR_PMPRO_DATA_LO,
186 .data_h = ERR_PMPRO_DATA_HI,
187 .warn_l = WARN_PMPRO_INFO_LO,
188 .warn_h = WARN_PMPRO_INFO_HI,
192 struct smpro_errmon {
193 struct regmap *regmap;
197 VRD_WARN_FAULT_EVENT,
200 DIMM_2X_REFRESH_EVENT,
204 /* Included Address of event source and data registers */
205 static u8 smpro_event_table[NUM_EVENTS_TYPE] = {
206 VRD_WARN_FAULT_EVENT_DATA,
209 DIMM_2X_REFRESH_EVENT_DATA,
212 static ssize_t smpro_event_data_read(struct device *dev,
213 struct device_attribute *da, char *buf,
216 struct smpro_errmon *errmon = dev_get_drvdata(dev);
220 ret = regmap_read(errmon->regmap, smpro_event_table[channel], &event_data);
223 /* Clear event after read */
225 regmap_write(errmon->regmap, smpro_event_table[channel], event_data);
227 return sysfs_emit(buf, "%04x\n", event_data);
230 static ssize_t smpro_overflow_data_read(struct device *dev, struct device_attribute *da,
231 char *buf, int channel)
233 struct smpro_errmon *errmon = dev_get_drvdata(dev);
234 struct smpro_error_hdr *err_info;
238 err_info = &smpro_error_table[channel];
240 ret = regmap_read(errmon->regmap, err_info->count, &err_count);
244 /* Bit 8 indicates the overflow status */
245 return sysfs_emit(buf, "%d\n", (err_count & BIT(8)) ? 1 : 0);
248 static ssize_t smpro_error_data_read(struct device *dev, struct device_attribute *da,
249 char *buf, int channel)
251 struct smpro_errmon *errmon = dev_get_drvdata(dev);
252 unsigned char err_data[MAX_READ_BLOCK_LENGTH];
253 struct smpro_error_hdr *err_info;
254 s32 err_count, err_length;
257 err_info = &smpro_error_table[channel];
259 ret = regmap_read(errmon->regmap, err_info->count, &err_count);
260 /* Error count is the low byte */
262 if (ret || !err_count || err_count > err_info->max_cnt)
265 ret = regmap_read(errmon->regmap, err_info->len, &err_length);
266 if (ret || err_length <= 0)
269 if (err_length > MAX_READ_BLOCK_LENGTH)
270 err_length = MAX_READ_BLOCK_LENGTH;
272 memset(err_data, 0x00, MAX_READ_BLOCK_LENGTH);
273 ret = regmap_noinc_read(errmon->regmap, err_info->data, err_data, err_length);
277 /* clear the error */
278 ret = regmap_write(errmon->regmap, err_info->count, 0x100);
282 * The output of Core/Memory/PCIe/Others UE/CE errors follows the format
283 * specified in section 5.8.1 CE/UE Error Data record in
284 * Altra SOC BMC Interface specification.
286 return sysfs_emit(buf, "%*phN\n", MAX_READ_BLOCK_LENGTH, err_data);
291 * <4-byte hex value of error info><4-byte hex value of error extensive data>
293 * + error info : The error information
294 * + error data : Extensive data (32 bits)
295 * Reference to section 5.10 RAS Internal Error Register Definition in
296 * Altra SOC BMC Interface specification
298 static ssize_t smpro_internal_err_read(struct device *dev, struct device_attribute *da,
299 char *buf, int channel)
301 struct smpro_errmon *errmon = dev_get_drvdata(dev);
302 struct smpro_int_error_hdr *err_info;
303 unsigned int err[4] = { 0 };
304 unsigned int err_type;
308 /* read error status */
309 ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val);
313 if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) ||
314 (channel == RAS_PMPRO_ERR && !(val & BIT(1))))
317 err_info = &list_smpro_int_error_hdr[channel];
318 ret = regmap_read(errmon->regmap, err_info->type, &val);
322 err_type = (val & BIT(1)) ? BIT(1) :
323 (val & BIT(2)) ? BIT(2) : 0;
328 ret = regmap_read(errmon->regmap, err_info->info_l, err + 1);
332 ret = regmap_read(errmon->regmap, err_info->info_h, err);
336 if (err_type & BIT(2)) {
337 /* Error with data type */
338 ret = regmap_read(errmon->regmap, err_info->data_l, err + 3);
342 ret = regmap_read(errmon->regmap, err_info->data_h, err + 2);
347 /* clear the read errors */
348 ret = regmap_write(errmon->regmap, err_info->type, err_type);
352 return sysfs_emit(buf, "%*phN\n", (int)sizeof(err), err);
357 * <4-byte hex value of warining info>
358 * Reference to section 5.10 RAS Internal Error Register Definition in
359 * Altra SOC BMC Interface specification
361 static ssize_t smpro_internal_warn_read(struct device *dev, struct device_attribute *da,
362 char *buf, int channel)
364 struct smpro_errmon *errmon = dev_get_drvdata(dev);
365 struct smpro_int_error_hdr *err_info;
366 unsigned int warn[2] = { 0 };
370 /* read error status */
371 ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val);
375 if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) ||
376 (channel == RAS_PMPRO_ERR && !(val & BIT(1))))
379 err_info = &list_smpro_int_error_hdr[channel];
380 ret = regmap_read(errmon->regmap, err_info->type, &val);
387 ret = regmap_read(errmon->regmap, err_info->warn_l, warn + 1);
391 ret = regmap_read(errmon->regmap, err_info->warn_h, warn);
395 /* clear the warning */
396 ret = regmap_write(errmon->regmap, err_info->type, BIT(0));
400 return sysfs_emit(buf, "%*phN\n", (int)sizeof(warn), warn);
403 #define ERROR_OVERFLOW_RO(_error, _index) \
404 static ssize_t overflow_##_error##_show(struct device *dev, \
405 struct device_attribute *da, \
408 return smpro_overflow_data_read(dev, da, buf, _index); \
410 static DEVICE_ATTR_RO(overflow_##_error)
412 ERROR_OVERFLOW_RO(core_ce, CORE_CE_ERR);
413 ERROR_OVERFLOW_RO(core_ue, CORE_UE_ERR);
414 ERROR_OVERFLOW_RO(mem_ce, MEM_CE_ERR);
415 ERROR_OVERFLOW_RO(mem_ue, MEM_UE_ERR);
416 ERROR_OVERFLOW_RO(pcie_ce, PCIE_CE_ERR);
417 ERROR_OVERFLOW_RO(pcie_ue, PCIE_UE_ERR);
418 ERROR_OVERFLOW_RO(other_ce, OTHER_CE_ERR);
419 ERROR_OVERFLOW_RO(other_ue, OTHER_UE_ERR);
421 #define ERROR_RO(_error, _index) \
422 static ssize_t error_##_error##_show(struct device *dev, \
423 struct device_attribute *da, \
426 return smpro_error_data_read(dev, da, buf, _index); \
428 static DEVICE_ATTR_RO(error_##_error)
430 ERROR_RO(core_ce, CORE_CE_ERR);
431 ERROR_RO(core_ue, CORE_UE_ERR);
432 ERROR_RO(mem_ce, MEM_CE_ERR);
433 ERROR_RO(mem_ue, MEM_UE_ERR);
434 ERROR_RO(pcie_ce, PCIE_CE_ERR);
435 ERROR_RO(pcie_ue, PCIE_UE_ERR);
436 ERROR_RO(other_ce, OTHER_CE_ERR);
437 ERROR_RO(other_ue, OTHER_UE_ERR);
439 static ssize_t error_smpro_show(struct device *dev, struct device_attribute *da, char *buf)
441 return smpro_internal_err_read(dev, da, buf, RAS_SMPRO_ERR);
443 static DEVICE_ATTR_RO(error_smpro);
445 static ssize_t error_pmpro_show(struct device *dev, struct device_attribute *da, char *buf)
447 return smpro_internal_err_read(dev, da, buf, RAS_PMPRO_ERR);
449 static DEVICE_ATTR_RO(error_pmpro);
451 static ssize_t warn_smpro_show(struct device *dev, struct device_attribute *da, char *buf)
453 return smpro_internal_warn_read(dev, da, buf, RAS_SMPRO_ERR);
455 static DEVICE_ATTR_RO(warn_smpro);
457 static ssize_t warn_pmpro_show(struct device *dev, struct device_attribute *da, char *buf)
459 return smpro_internal_warn_read(dev, da, buf, RAS_PMPRO_ERR);
461 static DEVICE_ATTR_RO(warn_pmpro);
463 #define EVENT_RO(_event, _index) \
464 static ssize_t event_##_event##_show(struct device *dev, \
465 struct device_attribute *da, \
468 return smpro_event_data_read(dev, da, buf, _index); \
470 static DEVICE_ATTR_RO(event_##_event)
472 EVENT_RO(vrd_warn_fault, VRD_WARN_FAULT_EVENT);
473 EVENT_RO(vrd_hot, VRD_HOT_EVENT);
474 EVENT_RO(dimm_hot, DIMM_HOT_EVENT);
475 EVENT_RO(dimm_2x_refresh, DIMM_2X_REFRESH_EVENT);
477 static ssize_t smpro_dimm_syndrome_read(struct device *dev, struct device_attribute *da,
478 char *buf, unsigned int slot)
480 struct smpro_errmon *errmon = dev_get_drvdata(dev);
484 ret = regmap_read(errmon->regmap, BOOTSTAGE, &data);
488 /* check for valid stage */
489 data = (data >> 8) & 0xff;
490 if (data != DIMM_SYNDROME_STAGE)
493 /* Write the slot ID to retrieve Error Syndrome */
494 ret = regmap_write(errmon->regmap, DIMM_SYNDROME_SEL, slot);
498 /* Read the Syndrome error */
499 ret = regmap_read(errmon->regmap, DIMM_SYNDROME_ERR, &data);
503 return sysfs_emit(buf, "%04x\n", data);
506 #define EVENT_DIMM_SYNDROME(_slot) \
507 static ssize_t event_dimm##_slot##_syndrome_show(struct device *dev, \
508 struct device_attribute *da, \
511 return smpro_dimm_syndrome_read(dev, da, buf, _slot); \
513 static DEVICE_ATTR_RO(event_dimm##_slot##_syndrome)
515 EVENT_DIMM_SYNDROME(0);
516 EVENT_DIMM_SYNDROME(1);
517 EVENT_DIMM_SYNDROME(2);
518 EVENT_DIMM_SYNDROME(3);
519 EVENT_DIMM_SYNDROME(4);
520 EVENT_DIMM_SYNDROME(5);
521 EVENT_DIMM_SYNDROME(6);
522 EVENT_DIMM_SYNDROME(7);
523 EVENT_DIMM_SYNDROME(8);
524 EVENT_DIMM_SYNDROME(9);
525 EVENT_DIMM_SYNDROME(10);
526 EVENT_DIMM_SYNDROME(11);
527 EVENT_DIMM_SYNDROME(12);
528 EVENT_DIMM_SYNDROME(13);
529 EVENT_DIMM_SYNDROME(14);
530 EVENT_DIMM_SYNDROME(15);
532 static struct attribute *smpro_errmon_attrs[] = {
533 &dev_attr_overflow_core_ce.attr,
534 &dev_attr_overflow_core_ue.attr,
535 &dev_attr_overflow_mem_ce.attr,
536 &dev_attr_overflow_mem_ue.attr,
537 &dev_attr_overflow_pcie_ce.attr,
538 &dev_attr_overflow_pcie_ue.attr,
539 &dev_attr_overflow_other_ce.attr,
540 &dev_attr_overflow_other_ue.attr,
541 &dev_attr_error_core_ce.attr,
542 &dev_attr_error_core_ue.attr,
543 &dev_attr_error_mem_ce.attr,
544 &dev_attr_error_mem_ue.attr,
545 &dev_attr_error_pcie_ce.attr,
546 &dev_attr_error_pcie_ue.attr,
547 &dev_attr_error_other_ce.attr,
548 &dev_attr_error_other_ue.attr,
549 &dev_attr_error_smpro.attr,
550 &dev_attr_error_pmpro.attr,
551 &dev_attr_warn_smpro.attr,
552 &dev_attr_warn_pmpro.attr,
553 &dev_attr_event_vrd_warn_fault.attr,
554 &dev_attr_event_vrd_hot.attr,
555 &dev_attr_event_dimm_hot.attr,
556 &dev_attr_event_dimm_2x_refresh.attr,
557 &dev_attr_event_dimm0_syndrome.attr,
558 &dev_attr_event_dimm1_syndrome.attr,
559 &dev_attr_event_dimm2_syndrome.attr,
560 &dev_attr_event_dimm3_syndrome.attr,
561 &dev_attr_event_dimm4_syndrome.attr,
562 &dev_attr_event_dimm5_syndrome.attr,
563 &dev_attr_event_dimm6_syndrome.attr,
564 &dev_attr_event_dimm7_syndrome.attr,
565 &dev_attr_event_dimm8_syndrome.attr,
566 &dev_attr_event_dimm9_syndrome.attr,
567 &dev_attr_event_dimm10_syndrome.attr,
568 &dev_attr_event_dimm11_syndrome.attr,
569 &dev_attr_event_dimm12_syndrome.attr,
570 &dev_attr_event_dimm13_syndrome.attr,
571 &dev_attr_event_dimm14_syndrome.attr,
572 &dev_attr_event_dimm15_syndrome.attr,
576 ATTRIBUTE_GROUPS(smpro_errmon);
578 static int smpro_errmon_probe(struct platform_device *pdev)
580 struct smpro_errmon *errmon;
582 errmon = devm_kzalloc(&pdev->dev, sizeof(struct smpro_errmon), GFP_KERNEL);
586 platform_set_drvdata(pdev, errmon);
588 errmon->regmap = dev_get_regmap(pdev->dev.parent, NULL);
595 static struct platform_driver smpro_errmon_driver = {
596 .probe = smpro_errmon_probe,
598 .name = "smpro-errmon",
599 .dev_groups = smpro_errmon_groups,
603 module_platform_driver(smpro_errmon_driver);
605 MODULE_AUTHOR("Tung Nguyen <tung.nguyen@amperecomputing.com>");
606 MODULE_AUTHOR("Thinh Pham <thinh.pham@amperecomputing.com>");
607 MODULE_AUTHOR("Hoang Nguyen <hnguyen@amperecomputing.com>");
608 MODULE_AUTHOR("Thu Nguyen <thu@os.amperecomputing.com>");
609 MODULE_AUTHOR("Quan Nguyen <quan@os.amperecomputing.com>");
610 MODULE_DESCRIPTION("Ampere Altra SMpro driver");
611 MODULE_LICENSE("GPL");