[PATCH] powerpc/eeh: parse AER registers

Ganesh Goudar ganeshgr at linux.ibm.com
Thu Jul 3 13:45:04 AEST 2025


parse AER uncorrectable and correctable error status
registers to print error type and severity.

output looks like
EEH:AER Uncorrectable Error
EEH:AER Error Type: Data Link Protocol Error [Fatal]

Signed-off-by: Ganesh Goudar <ganeshgr at linux.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 84 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 83 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 83fe99861eb1..03e1e2eeb679 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -139,6 +139,49 @@ struct eeh_stats {
 
 static struct eeh_stats eeh_stats;
 
+static const char * const aer_uncor_errors[] = {
+	"Undefined",
+	"Undefined",
+	"Undefined",
+	"Undefined",
+	"Data Link Protocol",
+	"Surprise Down",
+	"Poisoned TLP",
+	"Flow Control Protocol",
+	"Completion Timeout",
+	"Completer Abort",
+	"Unexpected Completion",
+	"Receiver Overflow",
+	"Malformed TLP",
+	"ECRC Error",
+	"Unsupported Request",
+	"ACS Violation",
+	"Uncorrectable Internal Error",
+	"MC Blocked TLP",
+	"AtomicOp Egress Blocked",
+	"TLPPrefix Blocked",
+	"Poisoned TLP Egress Blocked"
+};
+
+static const char * const aer_cor_errors[] = {
+	"Receiver Error",
+	"Undefined",
+	"Undefined",
+	"Undefined",
+	"Undefined",
+	"Undefined",
+	"Bad TLP",
+	"Bad DLLP",
+	"Replay Num Rollover",
+	"Undefined",
+	"Undefined",
+	"Undefined",
+	"Replay Timer Timeout",
+	"Advisory Non-Fatal Error",
+	"Corrected Internal Error",
+	"Header Log Overflow",
+};
+
 static int __init eeh_setup(char *str)
 {
 	if (!strcmp(str, "off"))
@@ -160,6 +203,43 @@ void eeh_show_enabled(void)
 		pr_info("EEH: No capable adapters found: recovery disabled.\n");
 }
 
+static void eeh_parse_aer_registers(struct eeh_dev *edev, int cap)
+{
+	int i;
+	const char *error_type;
+	u32 uncor_status, uncor_severity, cor_status;
+
+	eeh_ops->read_config(edev, cap + PCI_ERR_UNCOR_STATUS, 4, &uncor_status);
+	eeh_ops->read_config(edev, cap + PCI_ERR_UNCOR_SEVER, 4, &uncor_severity);
+	eeh_ops->read_config(edev, cap + PCI_ERR_COR_STATUS, 4, &cor_status);
+
+	if (!uncor_status && !cor_status)
+		return;
+
+	if (uncor_status) {
+		pr_err("EEH:AER Uncorrectable Error\n");
+		for (i = 0; i < ARRAY_SIZE(aer_uncor_errors); i++) {
+			if (uncor_status & (1 << i)) {
+				error_type = (i < ARRAY_SIZE(aer_uncor_errors))
+					     ? aer_uncor_errors[i] : "Unknown";
+				pr_err("EEH:AER Error Type: %s [%s]\n", error_type,
+				       (uncor_severity & (1 << i)) ? "Fatal" : "Non-Fatal");
+			}
+		}
+	}
+
+	if (cor_status) {
+		pr_err("EEH:AER Correctable Error\n");
+		for (i = 0; i < ARRAY_SIZE(aer_cor_errors); i++) {
+			if (cor_status & (1 << i)) {
+				error_type = (i < ARRAY_SIZE(aer_cor_errors))
+					      ? aer_cor_errors[i] : "Unknown";
+				pr_err("EEH:AER Error Type: %s\n", error_type);
+			}
+		}
+	}
+}
+
 /*
  * This routine captures assorted PCI configuration space data
  * for the indicated PCI device, and puts them into a buffer
@@ -237,9 +317,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 		pr_warn("%s\n", buffer);
 	}
 
-	/* If AER capable, dump it */
+	/* If AER capable, parse and dump it */
 	cap = edev->aer_cap;
 	if (cap) {
+		eeh_parse_aer_registers(edev, cap);
+
 		n += scnprintf(buf+n, len-n, "pci-e AER:\n");
 		pr_warn("EEH: PCI-E AER capability register set follows:\n");
 
-- 
2.48.1



More information about the Linuxppc-dev mailing list