[PATCH 5/5] powerpc/powernv: Make PHB diag-data output short
Benjamin Herrenschmidt
benh at kernel.crashing.org
Sat Feb 22 07:05:15 EST 2014
On Fri, 2014-02-21 at 19:53 +0800, Gavin Shan wrote:
> According to Ben's suggestion, the patch makes the PHB diag-data
> dump looks a bit short by printing multiple values in one line
> and outputing "-" for zero fields.
>
> After the patch applied, the PHB diag-data dump looks like:
Actually, I wouldn't do that "-" thing, I would leave zeros as
zeros but I would remove lines that have all zeros.
Additionally, we might want to consider what if we can get rid
of more fields for INF, or maybe even not dump them by default
and just count them (should we have counters in sysfs ?)
One thing I'm tempted to do is turn the full logs into actual
error logs (sent to FSP) and only display a "analyzed" version
in the kernel, something that decodes the PEST for example
and indicates if it's an DMA or MMIO error, the address, etc...
Cheers,
Ben.
> PHB3 PHB#3 Diag-data (Version: 1)
>
> brdgCtl: 00000002
> UtlSts: - - -
> RootSts: 0000000f 00400000 b0830008 00100147 00002000
> RootErrSts: - - -
> RootErrLog: - - - -
> RootErrLog1: - - -
> nFir: - 0030006e00000000 -
> PhbSts: 0000001c00000000 -
> Lem: 0000000000100000 42498e327f502eae -
> PhbErr: - - - -
> OutErr: - - - -
> InAErr: 8000000000000000 8000000000000000 0402030000000000 -
> InBErr: - - - -
> PE[ 8] A/B: 8480002b00000000 8000000000000000
>
> Signed-off-by: Gavin Shan <shangw at linux.vnet.ibm.com>
> ---
> arch/powerpc/platforms/powernv/pci.c | 238 ++++++++++++++++++++--------------
> 1 file changed, 143 insertions(+), 95 deletions(-)
>
> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
> index 67b2254..a5f236a 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -124,67 +124,103 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
> }
> #endif /* CONFIG_PCI_MSI */
>
> +static char *pnv_pci_diag_field(char *buf, int fmt, u64 val64)
> +{
> + u32 val32 = (u32)val64;
> +
> + memset(buf, 0, 24);
> + switch (fmt) {
> + case 8:
> + if (val32)
> + sprintf(buf, "%08x", val32);
> + else
> + sprintf(buf, "%s", "-");
> + break;
> + case 16:
> + if (val64)
> + sprintf(buf, "%016llx", val64);
> + else
> + sprintf(buf, "%s", "-");
> + break;
> + default:
> + sprintf(buf, "%s", "-");
> + }
> +
> + return buf;
> +}
> +
> static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
> struct OpalIoPhbErrorCommon *common)
> {
> struct OpalIoP7IOCPhbErrorData *data;
> + char buf[120];
> int i;
>
> data = (struct OpalIoP7IOCPhbErrorData *)common;
> pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n",
> hose->global_number, common->version);
>
> - pr_info(" brdgCtl: %08x\n", data->brdgCtl);
> -
> - pr_info(" portStatusReg: %08x\n", data->portStatusReg);
> - pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus);
> - pr_info(" busAgentStatus: %08x\n", data->busAgentStatus);
> -
> - pr_info(" deviceStatus: %08x\n", data->deviceStatus);
> - pr_info(" slotStatus: %08x\n", data->slotStatus);
> - pr_info(" linkStatus: %08x\n", data->linkStatus);
> - pr_info(" devCmdStatus: %08x\n", data->devCmdStatus);
> - pr_info(" devSecStatus: %08x\n", data->devSecStatus);
> -
> - pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus);
> - pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus);
> - pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus);
> - pr_info(" tlpHdr1: %08x\n", data->tlpHdr1);
> - pr_info(" tlpHdr2: %08x\n", data->tlpHdr2);
> - pr_info(" tlpHdr3: %08x\n", data->tlpHdr3);
> - pr_info(" tlpHdr4: %08x\n", data->tlpHdr4);
> - pr_info(" sourceId: %08x\n", data->sourceId);
> - pr_info(" errorClass: %016llx\n", data->errorClass);
> - pr_info(" correlator: %016llx\n", data->correlator);
> - pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr);
> - pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr);
> - pr_info(" lemFir: %016llx\n", data->lemFir);
> - pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask);
> - pr_info(" lemWOF: %016llx\n", data->lemWOF);
> - pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus);
> - pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus);
> - pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0);
> - pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1);
> - pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus);
> - pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
> - pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0);
> - pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1);
> - pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus);
> - pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
> - pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0);
> - pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1);
> - pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus);
> - pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
> - pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0);
> - pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1);
> + pr_info(" brdgCtl: %s\n",
> + pnv_pci_diag_field(&buf[0], 8, data->brdgCtl));
> + pr_info(" UtlSts: %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 8, data->portStatusReg),
> + pnv_pci_diag_field(&buf[1 * 24], 8, data->rootCmplxStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 8, data->busAgentStatus));
> + pr_info(" RootSts: %s %s %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 8, data->deviceStatus),
> + pnv_pci_diag_field(&buf[1 * 24], 8, data->slotStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 8, data->linkStatus),
> + pnv_pci_diag_field(&buf[3 * 24], 8, data->devCmdStatus),
> + pnv_pci_diag_field(&buf[4 * 24], 8, data->devSecStatus));
> + pr_info(" RootErrSts: %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 8, data->rootErrorStatus),
> + pnv_pci_diag_field(&buf[1 * 24], 8, data->uncorrErrorStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 8, data->corrErrorStatus));
> + pr_info(" RootErrLog: %s %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 8, data->tlpHdr1),
> + pnv_pci_diag_field(&buf[1 * 24], 8, data->tlpHdr2),
> + pnv_pci_diag_field(&buf[2 * 24], 8, data->tlpHdr3),
> + pnv_pci_diag_field(&buf[3 * 24], 8, data->tlpHdr4));
> + pr_info(" RootErrLog1: %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 8, data->sourceId),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->errorClass),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->correlator));
> + pr_info(" PhbSts: %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->p7iocPlssr),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->p7iocCsr));
> + pr_info(" Lem: %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->lemFir),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->lemErrorMask),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->lemWOF));
> + pr_info(" PhbErr: %s %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->phbErrorStatus),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->phbFirstErrorStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->phbErrorLog0),
> + pnv_pci_diag_field(&buf[3 * 24], 16, data->phbErrorLog1));
> + pr_info(" OutErr: %s %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->mmioErrorStatus),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->mmioFirstErrorStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->mmioErrorLog0),
> + pnv_pci_diag_field(&buf[3 * 24], 16, data->mmioErrorLog1));
> + pr_info(" InAErr: %s %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->dma0ErrorStatus),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->dma0FirstErrorStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->dma0ErrorLog0),
> + pnv_pci_diag_field(&buf[3 * 24], 16, data->dma0ErrorLog1));
> + pr_info(" InBErr: %s %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->dma1ErrorStatus),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->dma1FirstErrorStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->dma1ErrorLog0),
> + pnv_pci_diag_field(&buf[3 * 24], 16, data->dma1ErrorLog1));
>
> for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
> if ((data->pestA[i] >> 63) == 0 &&
> (data->pestB[i] >> 63) == 0)
> continue;
>
> - pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]);
> - pr_info(" PESTB: %016llx\n", data->pestB[i]);
> + pr_info(" PE[%3d] A/B: %s %s\n",
> + i, pnv_pci_diag_field(&buf[0], 16, data->pestA[i]),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->pestB[i]));
> }
> }
>
> @@ -192,67 +228,79 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
> struct OpalIoPhbErrorCommon *common)
> {
> struct OpalIoPhb3ErrorData *data;
> - int i;
> + char buf[120];
> + int i = 0;
>
> + memset(buf, 0, 120);
> data = (struct OpalIoPhb3ErrorData*)common;
> pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n",
> hose->global_number, common->version);
>
> - pr_info(" brdgCtl: %08x\n", data->brdgCtl);
> -
> - pr_info(" portStatusReg: %08x\n", data->portStatusReg);
> - pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus);
> - pr_info(" busAgentStatus: %08x\n", data->busAgentStatus);
> -
> - pr_info(" deviceStatus: %08x\n", data->deviceStatus);
> - pr_info(" slotStatus: %08x\n", data->slotStatus);
> - pr_info(" linkStatus: %08x\n", data->linkStatus);
> - pr_info(" devCmdStatus: %08x\n", data->devCmdStatus);
> - pr_info(" devSecStatus: %08x\n", data->devSecStatus);
> -
> - pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus);
> - pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus);
> - pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus);
> - pr_info(" tlpHdr1: %08x\n", data->tlpHdr1);
> - pr_info(" tlpHdr2: %08x\n", data->tlpHdr2);
> - pr_info(" tlpHdr3: %08x\n", data->tlpHdr3);
> - pr_info(" tlpHdr4: %08x\n", data->tlpHdr4);
> - pr_info(" sourceId: %08x\n", data->sourceId);
> - pr_info(" errorClass: %016llx\n", data->errorClass);
> - pr_info(" correlator: %016llx\n", data->correlator);
> -
> - pr_info(" nFir: %016llx\n", data->nFir);
> - pr_info(" nFirMask: %016llx\n", data->nFirMask);
> - pr_info(" nFirWOF: %016llx\n", data->nFirWOF);
> - pr_info(" PhbPlssr: %016llx\n", data->phbPlssr);
> - pr_info(" PhbCsr: %016llx\n", data->phbCsr);
> - pr_info(" lemFir: %016llx\n", data->lemFir);
> - pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask);
> - pr_info(" lemWOF: %016llx\n", data->lemWOF);
> - pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus);
> - pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus);
> - pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0);
> - pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1);
> - pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus);
> - pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
> - pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0);
> - pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1);
> - pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus);
> - pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
> - pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0);
> - pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1);
> - pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus);
> - pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
> - pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0);
> - pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1);
> + pr_info(" brdgCtl: %s\n",
> + pnv_pci_diag_field(&buf[0], 8, data->brdgCtl));
> + pr_info(" UtlSts: %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 8, data->portStatusReg),
> + pnv_pci_diag_field(&buf[1 * 24], 8, data->rootCmplxStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 8, data->busAgentStatus));
> + pr_info(" RootSts: %s %s %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 8, data->deviceStatus),
> + pnv_pci_diag_field(&buf[1 * 24], 8, data->slotStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 8, data->linkStatus),
> + pnv_pci_diag_field(&buf[3 * 24], 8, data->devCmdStatus),
> + pnv_pci_diag_field(&buf[4 * 24], 8, data->devSecStatus));
> + pr_info(" RootErrSts: %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 8, data->rootErrorStatus),
> + pnv_pci_diag_field(&buf[1 * 24], 8, data->uncorrErrorStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 8, data->corrErrorStatus));
> + pr_info(" RootErrLog: %s %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 8, data->tlpHdr1),
> + pnv_pci_diag_field(&buf[1 * 24], 8, data->tlpHdr2),
> + pnv_pci_diag_field(&buf[2 * 24], 8, data->tlpHdr3),
> + pnv_pci_diag_field(&buf[3 * 24], 8, data->tlpHdr4));
> + pr_info(" RootErrLog1: %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 8, data->sourceId),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->errorClass),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->correlator));
> + pr_info(" nFir: %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->nFir),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->nFirMask),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->nFirWOF));
> + pr_info(" PhbSts: %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->phbPlssr),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->phbCsr));
> + pr_info(" Lem: %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->lemFir),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->lemErrorMask),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->lemWOF));
> + pr_info(" PhbErr: %s %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->phbErrorStatus),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->phbFirstErrorStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->phbErrorLog0),
> + pnv_pci_diag_field(&buf[3 * 24], 16, data->phbErrorLog1));
> + pr_info(" OutErr: %s %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->mmioErrorStatus),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->mmioFirstErrorStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->mmioErrorLog0),
> + pnv_pci_diag_field(&buf[3 * 24], 16, data->mmioErrorLog1));
> + pr_info(" InAErr: %s %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->dma0ErrorStatus),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->dma0FirstErrorStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->dma0ErrorLog0),
> + pnv_pci_diag_field(&buf[3 * 24], 16, data->dma0ErrorLog1));
> + pr_info(" InBErr: %s %s %s %s\n",
> + pnv_pci_diag_field(&buf[0], 16, data->dma1ErrorStatus),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->dma1FirstErrorStatus),
> + pnv_pci_diag_field(&buf[2 * 24], 16, data->dma1ErrorLog0),
> + pnv_pci_diag_field(&buf[3 * 24], 16, data->dma1ErrorLog1));
>
> for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
> if ((data->pestA[i] >> 63) == 0 &&
> (data->pestB[i] >> 63) == 0)
> continue;
>
> - pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]);
> - pr_info(" PESTB: %016llx\n", data->pestB[i]);
> + pr_info(" PE[%3d] A/B: %s %s\n",
> + i, pnv_pci_diag_field(&buf[0], 16, data->pestA[i]),
> + pnv_pci_diag_field(&buf[1 * 24], 16, data->pestB[i]));
> }
> }
>
More information about the Linuxppc-dev
mailing list