1 // SPDX-License-Identifier: GPL-2.0-only
3 * Ampere Computing SoC's SMpro Error Monitoring Driver
5 * Copyright (c) 2022, Ampere Computing LLC
9 #include <linux/mod_devicetable.h>
10 #include <linux/module.h>
11 #include <linux/platform_device.h>
12 #include <linux/regmap.h>
14 /* GPI RAS Error Registers */
15 #define GPI_RAS_ERR 0x7E
17 /* Core and L2C Error Registers */
18 #define CORE_CE_ERR_CNT 0x80
19 #define CORE_CE_ERR_LEN 0x81
20 #define CORE_CE_ERR_DATA 0x82
21 #define CORE_UE_ERR_CNT 0x83
22 #define CORE_UE_ERR_LEN 0x84
23 #define CORE_UE_ERR_DATA 0x85
25 /* Memory Error Registers */
26 #define MEM_CE_ERR_CNT 0x90
27 #define MEM_CE_ERR_LEN 0x91
28 #define MEM_CE_ERR_DATA 0x92
29 #define MEM_UE_ERR_CNT 0x93
30 #define MEM_UE_ERR_LEN 0x94
31 #define MEM_UE_ERR_DATA 0x95
33 /* RAS Error/Warning Registers */
34 #define ERR_SMPRO_TYPE 0xA0
35 #define ERR_PMPRO_TYPE 0xA1
36 #define ERR_SMPRO_INFO_LO 0xA2
37 #define ERR_SMPRO_INFO_HI 0xA3
38 #define ERR_SMPRO_DATA_LO 0xA4
39 #define ERR_SMPRO_DATA_HI 0xA5
40 #define WARN_SMPRO_INFO_LO 0xAA
41 #define WARN_SMPRO_INFO_HI 0xAB
42 #define ERR_PMPRO_INFO_LO 0xA6
43 #define ERR_PMPRO_INFO_HI 0xA7
44 #define ERR_PMPRO_DATA_LO 0xA8
45 #define ERR_PMPRO_DATA_HI 0xA9
46 #define WARN_PMPRO_INFO_LO 0xAC
47 #define WARN_PMPRO_INFO_HI 0xAD
49 /* Boot Stage Register */
50 #define BOOTSTAGE 0xB0
51 #define DIMM_SYNDROME_SEL 0xB4
52 #define DIMM_SYNDROME_ERR 0xB5
53 #define DIMM_SYNDROME_STAGE 4
55 /* PCIE Error Registers */
56 #define PCIE_CE_ERR_CNT 0xC0
57 #define PCIE_CE_ERR_LEN 0xC1
58 #define PCIE_CE_ERR_DATA 0xC2
59 #define PCIE_UE_ERR_CNT 0xC3
60 #define PCIE_UE_ERR_LEN 0xC4
61 #define PCIE_UE_ERR_DATA 0xC5
63 /* Other Error Registers */
64 #define OTHER_CE_ERR_CNT 0xD0
65 #define OTHER_CE_ERR_LEN 0xD1
66 #define OTHER_CE_ERR_DATA 0xD2
67 #define OTHER_UE_ERR_CNT 0xD8
68 #define OTHER_UE_ERR_LEN 0xD9
69 #define OTHER_UE_ERR_DATA 0xDA
71 /* Event Data Registers */
72 #define VRD_WARN_FAULT_EVENT_DATA 0x78
73 #define VRD_HOT_EVENT_DATA 0x79
74 #define DIMM_HOT_EVENT_DATA 0x7A
75 #define DIMM_2X_REFRESH_EVENT_DATA 0x96
77 #define MAX_READ_BLOCK_LENGTH 48
79 #define RAS_SMPRO_ERR 0
80 #define RAS_PMPRO_ERR 1
82 enum RAS_48BYTES_ERR_TYPES {
94 struct smpro_error_hdr {
95 u8 count; /* Number of the RAS errors */
96 u8 len; /* Number of data bytes */
97 u8 data; /* Start of 48-byte data */
98 u8 max_cnt; /* Max num of errors */
102 * Included Address of registers to get Count, Length of data and Data
103 * of the 48 bytes error data
105 static struct smpro_error_hdr smpro_error_table[] = {
107 .count = CORE_CE_ERR_CNT,
108 .len = CORE_CE_ERR_LEN,
109 .data = CORE_CE_ERR_DATA,
113 .count = CORE_UE_ERR_CNT,
114 .len = CORE_UE_ERR_LEN,
115 .data = CORE_UE_ERR_DATA,
119 .count = MEM_CE_ERR_CNT,
120 .len = MEM_CE_ERR_LEN,
121 .data = MEM_CE_ERR_DATA,
125 .count = MEM_UE_ERR_CNT,
126 .len = MEM_UE_ERR_LEN,
127 .data = MEM_UE_ERR_DATA,
131 .count = PCIE_CE_ERR_CNT,
132 .len = PCIE_CE_ERR_LEN,
133 .data = PCIE_CE_ERR_DATA,
137 .count = PCIE_UE_ERR_CNT,
138 .len = PCIE_UE_ERR_LEN,
139 .data = PCIE_UE_ERR_DATA,
143 .count = OTHER_CE_ERR_CNT,
144 .len = OTHER_CE_ERR_LEN,
145 .data = OTHER_CE_ERR_DATA,
149 .count = OTHER_UE_ERR_CNT,
150 .len = OTHER_UE_ERR_LEN,
151 .data = OTHER_UE_ERR_DATA,
157 * List of SCP registers which are used to get
158 * one type of RAS Internal errors.
160 struct smpro_int_error_hdr {
170 static struct smpro_int_error_hdr list_smpro_int_error_hdr[] = {
172 .type = ERR_SMPRO_TYPE,
173 .info_l = ERR_SMPRO_INFO_LO,
174 .info_h = ERR_SMPRO_INFO_HI,
175 .data_l = ERR_SMPRO_DATA_LO,
176 .data_h = ERR_SMPRO_DATA_HI,
177 .warn_l = WARN_SMPRO_INFO_LO,
178 .warn_h = WARN_SMPRO_INFO_HI,
181 .type = ERR_PMPRO_TYPE,
182 .info_l = ERR_PMPRO_INFO_LO,
183 .info_h = ERR_PMPRO_INFO_HI,
184 .data_l = ERR_PMPRO_DATA_LO,
185 .data_h = ERR_PMPRO_DATA_HI,
186 .warn_l = WARN_PMPRO_INFO_LO,
187 .warn_h = WARN_PMPRO_INFO_HI,
191 struct smpro_errmon {
192 struct regmap *regmap;
196 VRD_WARN_FAULT_EVENT,
199 DIMM_2X_REFRESH_EVENT,
203 /* Included Address of event source and data registers */
204 static u8 smpro_event_table[NUM_EVENTS_TYPE] = {
205 VRD_WARN_FAULT_EVENT_DATA,
208 DIMM_2X_REFRESH_EVENT_DATA,
211 static ssize_t smpro_event_data_read(struct device *dev,
212 struct device_attribute *da, char *buf,
215 struct smpro_errmon *errmon = dev_get_drvdata(dev);
219 ret = regmap_read(errmon->regmap, smpro_event_table[channel], &event_data);
222 /* Clear event after read */
224 regmap_write(errmon->regmap, smpro_event_table[channel], event_data);
226 return sysfs_emit(buf, "%04x\n", event_data);
229 static ssize_t smpro_overflow_data_read(struct device *dev, struct device_attribute *da,
230 char *buf, int channel)
232 struct smpro_errmon *errmon = dev_get_drvdata(dev);
233 struct smpro_error_hdr *err_info;
237 err_info = &smpro_error_table[channel];
239 ret = regmap_read(errmon->regmap, err_info->count, &err_count);
243 /* Bit 8 indicates the overflow status */
244 return sysfs_emit(buf, "%d\n", (err_count & BIT(8)) ? 1 : 0);
247 static ssize_t smpro_error_data_read(struct device *dev, struct device_attribute *da,
248 char *buf, int channel)
250 struct smpro_errmon *errmon = dev_get_drvdata(dev);
251 unsigned char err_data[MAX_READ_BLOCK_LENGTH];
252 struct smpro_error_hdr *err_info;
253 s32 err_count, err_length;
256 err_info = &smpro_error_table[channel];
258 ret = regmap_read(errmon->regmap, err_info->count, &err_count);
259 /* Error count is the low byte */
261 if (ret || !err_count || err_count > err_info->max_cnt)
264 ret = regmap_read(errmon->regmap, err_info->len, &err_length);
265 if (ret || err_length <= 0)
268 if (err_length > MAX_READ_BLOCK_LENGTH)
269 err_length = MAX_READ_BLOCK_LENGTH;
271 memset(err_data, 0x00, MAX_READ_BLOCK_LENGTH);
272 ret = regmap_noinc_read(errmon->regmap, err_info->data, err_data, err_length);
276 /* clear the error */
277 ret = regmap_write(errmon->regmap, err_info->count, 0x100);
281 * The output of Core/Memory/PCIe/Others UE/CE errors follows the format
282 * specified in section 5.8.1 CE/UE Error Data record in
283 * Altra SOC BMC Interface specification.
285 return sysfs_emit(buf, "%*phN\n", MAX_READ_BLOCK_LENGTH, err_data);
290 * <4-byte hex value of error info><4-byte hex value of error extensive data>
292 * + error info : The error information
293 * + error data : Extensive data (32 bits)
294 * Reference to section 5.10 RAS Internal Error Register Definition in
295 * Altra SOC BMC Interface specification
297 static ssize_t smpro_internal_err_read(struct device *dev, struct device_attribute *da,
298 char *buf, int channel)
300 struct smpro_errmon *errmon = dev_get_drvdata(dev);
301 struct smpro_int_error_hdr *err_info;
302 unsigned int err[4] = { 0 };
303 unsigned int err_type;
307 /* read error status */
308 ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val);
312 if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) ||
313 (channel == RAS_PMPRO_ERR && !(val & BIT(1))))
316 err_info = &list_smpro_int_error_hdr[channel];
317 ret = regmap_read(errmon->regmap, err_info->type, &val);
321 err_type = (val & BIT(1)) ? BIT(1) :
322 (val & BIT(2)) ? BIT(2) : 0;
327 ret = regmap_read(errmon->regmap, err_info->info_l, err + 1);
331 ret = regmap_read(errmon->regmap, err_info->info_h, err);
335 if (err_type & BIT(2)) {
336 /* Error with data type */
337 ret = regmap_read(errmon->regmap, err_info->data_l, err + 3);
341 ret = regmap_read(errmon->regmap, err_info->data_h, err + 2);
346 /* clear the read errors */
347 ret = regmap_write(errmon->regmap, err_info->type, err_type);
351 return sysfs_emit(buf, "%*phN\n", (int)sizeof(err), err);
356 * <4-byte hex value of warining info>
357 * Reference to section 5.10 RAS Internal Error Register Definition in
358 * Altra SOC BMC Interface specification
360 static ssize_t smpro_internal_warn_read(struct device *dev, struct device_attribute *da,
361 char *buf, int channel)
363 struct smpro_errmon *errmon = dev_get_drvdata(dev);
364 struct smpro_int_error_hdr *err_info;
365 unsigned int warn[2] = { 0 };
369 /* read error status */
370 ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val);
374 if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) ||
375 (channel == RAS_PMPRO_ERR && !(val & BIT(1))))
378 err_info = &list_smpro_int_error_hdr[channel];
379 ret = regmap_read(errmon->regmap, err_info->type, &val);
386 ret = regmap_read(errmon->regmap, err_info->warn_l, warn + 1);
390 ret = regmap_read(errmon->regmap, err_info->warn_h, warn);
394 /* clear the warning */
395 ret = regmap_write(errmon->regmap, err_info->type, BIT(0));
399 return sysfs_emit(buf, "%*phN\n", (int)sizeof(warn), warn);
402 #define ERROR_OVERFLOW_RO(_error, _index) \
403 static ssize_t overflow_##_error##_show(struct device *dev, \
404 struct device_attribute *da, \
407 return smpro_overflow_data_read(dev, da, buf, _index); \
409 static DEVICE_ATTR_RO(overflow_##_error)
411 ERROR_OVERFLOW_RO(core_ce, CORE_CE_ERR);
412 ERROR_OVERFLOW_RO(core_ue, CORE_UE_ERR);
413 ERROR_OVERFLOW_RO(mem_ce, MEM_CE_ERR);
414 ERROR_OVERFLOW_RO(mem_ue, MEM_UE_ERR);
415 ERROR_OVERFLOW_RO(pcie_ce, PCIE_CE_ERR);
416 ERROR_OVERFLOW_RO(pcie_ue, PCIE_UE_ERR);
417 ERROR_OVERFLOW_RO(other_ce, OTHER_CE_ERR);
418 ERROR_OVERFLOW_RO(other_ue, OTHER_UE_ERR);
420 #define ERROR_RO(_error, _index) \
421 static ssize_t error_##_error##_show(struct device *dev, \
422 struct device_attribute *da, \
425 return smpro_error_data_read(dev, da, buf, _index); \
427 static DEVICE_ATTR_RO(error_##_error)
429 ERROR_RO(core_ce, CORE_CE_ERR);
430 ERROR_RO(core_ue, CORE_UE_ERR);
431 ERROR_RO(mem_ce, MEM_CE_ERR);
432 ERROR_RO(mem_ue, MEM_UE_ERR);
433 ERROR_RO(pcie_ce, PCIE_CE_ERR);
434 ERROR_RO(pcie_ue, PCIE_UE_ERR);
435 ERROR_RO(other_ce, OTHER_CE_ERR);
436 ERROR_RO(other_ue, OTHER_UE_ERR);
438 static ssize_t error_smpro_show(struct device *dev, struct device_attribute *da, char *buf)
440 return smpro_internal_err_read(dev, da, buf, RAS_SMPRO_ERR);
442 static DEVICE_ATTR_RO(error_smpro);
444 static ssize_t error_pmpro_show(struct device *dev, struct device_attribute *da, char *buf)
446 return smpro_internal_err_read(dev, da, buf, RAS_PMPRO_ERR);
448 static DEVICE_ATTR_RO(error_pmpro);
450 static ssize_t warn_smpro_show(struct device *dev, struct device_attribute *da, char *buf)
452 return smpro_internal_warn_read(dev, da, buf, RAS_SMPRO_ERR);
454 static DEVICE_ATTR_RO(warn_smpro);
456 static ssize_t warn_pmpro_show(struct device *dev, struct device_attribute *da, char *buf)
458 return smpro_internal_warn_read(dev, da, buf, RAS_PMPRO_ERR);
460 static DEVICE_ATTR_RO(warn_pmpro);
462 #define EVENT_RO(_event, _index) \
463 static ssize_t event_##_event##_show(struct device *dev, \
464 struct device_attribute *da, \
467 return smpro_event_data_read(dev, da, buf, _index); \
469 static DEVICE_ATTR_RO(event_##_event)
471 EVENT_RO(vrd_warn_fault, VRD_WARN_FAULT_EVENT);
472 EVENT_RO(vrd_hot, VRD_HOT_EVENT);
473 EVENT_RO(dimm_hot, DIMM_HOT_EVENT);
474 EVENT_RO(dimm_2x_refresh, DIMM_2X_REFRESH_EVENT);
476 static ssize_t smpro_dimm_syndrome_read(struct device *dev, struct device_attribute *da,
477 char *buf, unsigned int slot)
479 struct smpro_errmon *errmon = dev_get_drvdata(dev);
483 ret = regmap_read(errmon->regmap, BOOTSTAGE, &data);
487 /* check for valid stage */
488 data = (data >> 8) & 0xff;
489 if (data != DIMM_SYNDROME_STAGE)
492 /* Write the slot ID to retrieve Error Syndrome */
493 ret = regmap_write(errmon->regmap, DIMM_SYNDROME_SEL, slot);
497 /* Read the Syndrome error */
498 ret = regmap_read(errmon->regmap, DIMM_SYNDROME_ERR, &data);
502 return sysfs_emit(buf, "%04x\n", data);
505 #define EVENT_DIMM_SYNDROME(_slot) \
506 static ssize_t event_dimm##_slot##_syndrome_show(struct device *dev, \
507 struct device_attribute *da, \
510 return smpro_dimm_syndrome_read(dev, da, buf, _slot); \
512 static DEVICE_ATTR_RO(event_dimm##_slot##_syndrome)
514 EVENT_DIMM_SYNDROME(0);
515 EVENT_DIMM_SYNDROME(1);
516 EVENT_DIMM_SYNDROME(2);
517 EVENT_DIMM_SYNDROME(3);
518 EVENT_DIMM_SYNDROME(4);
519 EVENT_DIMM_SYNDROME(5);
520 EVENT_DIMM_SYNDROME(6);
521 EVENT_DIMM_SYNDROME(7);
522 EVENT_DIMM_SYNDROME(8);
523 EVENT_DIMM_SYNDROME(9);
524 EVENT_DIMM_SYNDROME(10);
525 EVENT_DIMM_SYNDROME(11);
526 EVENT_DIMM_SYNDROME(12);
527 EVENT_DIMM_SYNDROME(13);
528 EVENT_DIMM_SYNDROME(14);
529 EVENT_DIMM_SYNDROME(15);
531 static struct attribute *smpro_errmon_attrs[] = {
532 &dev_attr_overflow_core_ce.attr,
533 &dev_attr_overflow_core_ue.attr,
534 &dev_attr_overflow_mem_ce.attr,
535 &dev_attr_overflow_mem_ue.attr,
536 &dev_attr_overflow_pcie_ce.attr,
537 &dev_attr_overflow_pcie_ue.attr,
538 &dev_attr_overflow_other_ce.attr,
539 &dev_attr_overflow_other_ue.attr,
540 &dev_attr_error_core_ce.attr,
541 &dev_attr_error_core_ue.attr,
542 &dev_attr_error_mem_ce.attr,
543 &dev_attr_error_mem_ue.attr,
544 &dev_attr_error_pcie_ce.attr,
545 &dev_attr_error_pcie_ue.attr,
546 &dev_attr_error_other_ce.attr,
547 &dev_attr_error_other_ue.attr,
548 &dev_attr_error_smpro.attr,
549 &dev_attr_error_pmpro.attr,
550 &dev_attr_warn_smpro.attr,
551 &dev_attr_warn_pmpro.attr,
552 &dev_attr_event_vrd_warn_fault.attr,
553 &dev_attr_event_vrd_hot.attr,
554 &dev_attr_event_dimm_hot.attr,
555 &dev_attr_event_dimm_2x_refresh.attr,
556 &dev_attr_event_dimm0_syndrome.attr,
557 &dev_attr_event_dimm1_syndrome.attr,
558 &dev_attr_event_dimm2_syndrome.attr,
559 &dev_attr_event_dimm3_syndrome.attr,
560 &dev_attr_event_dimm4_syndrome.attr,
561 &dev_attr_event_dimm5_syndrome.attr,
562 &dev_attr_event_dimm6_syndrome.attr,
563 &dev_attr_event_dimm7_syndrome.attr,
564 &dev_attr_event_dimm8_syndrome.attr,
565 &dev_attr_event_dimm9_syndrome.attr,
566 &dev_attr_event_dimm10_syndrome.attr,
567 &dev_attr_event_dimm11_syndrome.attr,
568 &dev_attr_event_dimm12_syndrome.attr,
569 &dev_attr_event_dimm13_syndrome.attr,
570 &dev_attr_event_dimm14_syndrome.attr,
571 &dev_attr_event_dimm15_syndrome.attr,
575 ATTRIBUTE_GROUPS(smpro_errmon);
577 static int smpro_errmon_probe(struct platform_device *pdev)
579 struct smpro_errmon *errmon;
581 errmon = devm_kzalloc(&pdev->dev, sizeof(struct smpro_errmon), GFP_KERNEL);
585 platform_set_drvdata(pdev, errmon);
587 errmon->regmap = dev_get_regmap(pdev->dev.parent, NULL);
594 static struct platform_driver smpro_errmon_driver = {
595 .probe = smpro_errmon_probe,
597 .name = "smpro-errmon",
598 .dev_groups = smpro_errmon_groups,
602 module_platform_driver(smpro_errmon_driver);
604 MODULE_AUTHOR("Tung Nguyen <tung.nguyen@amperecomputing.com>");
605 MODULE_AUTHOR("Thinh Pham <thinh.pham@amperecomputing.com>");
606 MODULE_AUTHOR("Hoang Nguyen <hnguyen@amperecomputing.com>");
607 MODULE_AUTHOR("Thu Nguyen <thu@os.amperecomputing.com>");
608 MODULE_AUTHOR("Quan Nguyen <quan@os.amperecomputing.com>");
609 MODULE_DESCRIPTION("Ampere Altra SMpro driver");
610 MODULE_LICENSE("GPL");