[PATCH] powerpc/eeh: parse AER registers
Mahesh J Salgaonkar
mahesh at linux.ibm.com
Mon Jul 21 14:11:40 AEST 2025
On 2025-07-03 09:15:04 Thu, Ganesh Goudar wrote:
> parse AER uncorrectable and correctable error status
> registers to print error type and severity.
>
> output looks like
> EEH:AER Uncorrectable Error
> EEH:AER Error Type: Data Link Protocol Error [Fatal]
Thanks for working on this. But how do we know which PHB this error is
reported on ? Can we have PHB details as prefix in the error message ?
Also, can we have Error message format something like below ?
0000:50:00.0: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, id=0500(Requester ID)
Thanks,
-Mahesh.
>
> Signed-off-by: Ganesh Goudar <ganeshgr at linux.ibm.com>
> ---
> arch/powerpc/kernel/eeh.c | 84 ++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 83 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index 83fe99861eb1..03e1e2eeb679 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -139,6 +139,49 @@ struct eeh_stats {
>
> static struct eeh_stats eeh_stats;
>
> +static const char * const aer_uncor_errors[] = {
> + "Undefined",
> + "Undefined",
> + "Undefined",
> + "Undefined",
> + "Data Link Protocol",
> + "Surprise Down",
> + "Poisoned TLP",
> + "Flow Control Protocol",
> + "Completion Timeout",
> + "Completer Abort",
> + "Unexpected Completion",
> + "Receiver Overflow",
> + "Malformed TLP",
> + "ECRC Error",
> + "Unsupported Request",
> + "ACS Violation",
> + "Uncorrectable Internal Error",
> + "MC Blocked TLP",
> + "AtomicOp Egress Blocked",
> + "TLPPrefix Blocked",
> + "Poisoned TLP Egress Blocked"
> +};
> +
> +static const char * const aer_cor_errors[] = {
> + "Receiver Error",
> + "Undefined",
> + "Undefined",
> + "Undefined",
> + "Undefined",
> + "Undefined",
> + "Bad TLP",
> + "Bad DLLP",
> + "Replay Num Rollover",
> + "Undefined",
> + "Undefined",
> + "Undefined",
> + "Replay Timer Timeout",
> + "Advisory Non-Fatal Error",
> + "Corrected Internal Error",
> + "Header Log Overflow",
> +};
> +
> static int __init eeh_setup(char *str)
> {
> if (!strcmp(str, "off"))
> @@ -160,6 +203,43 @@ void eeh_show_enabled(void)
> pr_info("EEH: No capable adapters found: recovery disabled.\n");
> }
>
> +static void eeh_parse_aer_registers(struct eeh_dev *edev, int cap)
> +{
> + int i;
> + const char *error_type;
> + u32 uncor_status, uncor_severity, cor_status;
> +
> + eeh_ops->read_config(edev, cap + PCI_ERR_UNCOR_STATUS, 4, &uncor_status);
> + eeh_ops->read_config(edev, cap + PCI_ERR_UNCOR_SEVER, 4, &uncor_severity);
> + eeh_ops->read_config(edev, cap + PCI_ERR_COR_STATUS, 4, &cor_status);
> +
> + if (!uncor_status && !cor_status)
> + return;
> +
> + if (uncor_status) {
> + pr_err("EEH:AER Uncorrectable Error\n");
> + for (i = 0; i < ARRAY_SIZE(aer_uncor_errors); i++) {
> + if (uncor_status & (1 << i)) {
> + error_type = (i < ARRAY_SIZE(aer_uncor_errors))
> + ? aer_uncor_errors[i] : "Unknown";
> + pr_err("EEH:AER Error Type: %s [%s]\n", error_type,
> + (uncor_severity & (1 << i)) ? "Fatal" : "Non-Fatal");
> + }
> + }
> + }
> +
> + if (cor_status) {
> + pr_err("EEH:AER Correctable Error\n");
> + for (i = 0; i < ARRAY_SIZE(aer_cor_errors); i++) {
> + if (cor_status & (1 << i)) {
> + error_type = (i < ARRAY_SIZE(aer_cor_errors))
> + ? aer_cor_errors[i] : "Unknown";
> + pr_err("EEH:AER Error Type: %s\n", error_type);
> + }
> + }
> + }
> +}
> +
> /*
> * This routine captures assorted PCI configuration space data
> * for the indicated PCI device, and puts them into a buffer
> @@ -237,9 +317,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
> pr_warn("%s\n", buffer);
> }
>
> - /* If AER capable, dump it */
> + /* If AER capable, parse and dump it */
> cap = edev->aer_cap;
> if (cap) {
> + eeh_parse_aer_registers(edev, cap);
> +
> n += scnprintf(buf+n, len-n, "pci-e AER:\n");
> pr_warn("EEH: PCI-E AER capability register set follows:\n");
>
> --
> 2.48.1
>
>
--
Mahesh J Salgaonkar
More information about the Linuxppc-dev
mailing list