[PATCH 5/5] powerpc/powernv: Make PHB diag-data output short
Gavin Shan
shangw at linux.vnet.ibm.com
Sun Feb 23 15:55:48 EST 2014
On Sat, Feb 22, 2014 at 07:05:15AM +1100, Benjamin Herrenschmidt wrote:
>On Fri, 2014-02-21 at 19:53 +0800, Gavin Shan wrote:
>> According to Ben's suggestion, the patch makes the PHB diag-data
>> dump looks a bit short by printing multiple values in one line
>> and outputing "-" for zero fields.
>>
>> After the patch applied, the PHB diag-data dump looks like:
>
>Actually, I wouldn't do that "-" thing, I would leave zeros as
>zeros but I would remove lines that have all zeros.
>
Ok. I'll change it in next revision :-)
>Additionally, we might want to consider what if we can get rid
>of more fields for INF, or maybe even not dump them by default
>and just count them (should we have counters in sysfs ?)
>
Yes, I'll remove dumping for INF and have a sysfs entry for the
INF counter, which would be separate patch in next revision.
>One thing I'm tempted to do is turn the full logs into actual
>error logs (sent to FSP) and only display a "analyzed" version
>in the kernel, something that decodes the PEST for example
>and indicates if it's an DMA or MMIO error, the address, etc...
>
Ok. I'll try to do it in next revision :-)
Thanks,
Gavin
>> PHB3 PHB#3 Diag-data (Version: 1)
>>
>> brdgCtl: 00000002
>> UtlSts: - - -
>> RootSts: 0000000f 00400000 b0830008 00100147 00002000
>> RootErrSts: - - -
>> RootErrLog: - - - -
>> RootErrLog1: - - -
>> nFir: - 0030006e00000000 -
>> PhbSts: 0000001c00000000 -
>> Lem: 0000000000100000 42498e327f502eae -
>> PhbErr: - - - -
>> OutErr: - - - -
>> InAErr: 8000000000000000 8000000000000000 0402030000000000 -
>> InBErr: - - - -
>> PE[ 8] A/B: 8480002b00000000 8000000000000000
>>
>> Signed-off-by: Gavin Shan <shangw at linux.vnet.ibm.com>
>> ---
>> arch/powerpc/platforms/powernv/pci.c | 238 ++++++++++++++++++++--------------
>> 1 file changed, 143 insertions(+), 95 deletions(-)
>>
>> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
>> index 67b2254..a5f236a 100644
>> --- a/arch/powerpc/platforms/powernv/pci.c
>> +++ b/arch/powerpc/platforms/powernv/pci.c
>> @@ -124,67 +124,103 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
>> }
>> #endif /* CONFIG_PCI_MSI */
>>
>> +static char *pnv_pci_diag_field(char *buf, int fmt, u64 val64)
>> +{
>> + u32 val32 = (u32)val64;
>> +
>> + memset(buf, 0, 24);
>> + switch (fmt) {
>> + case 8:
>> + if (val32)
>> + sprintf(buf, "%08x", val32);
>> + else
>> + sprintf(buf, "%s", "-");
>> + break;
>> + case 16:
>> + if (val64)
>> + sprintf(buf, "%016llx", val64);
>> + else
>> + sprintf(buf, "%s", "-");
>> + break;
>> + default:
>> + sprintf(buf, "%s", "-");
>> + }
>> +
>> + return buf;
>> +}
>> +
>> static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
>> struct OpalIoPhbErrorCommon *common)
>> {
>> struct OpalIoP7IOCPhbErrorData *data;
>> + char buf[120];
>> int i;
>>
>> data = (struct OpalIoP7IOCPhbErrorData *)common;
>> pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n",
>> hose->global_number, common->version);
>>
>> - pr_info(" brdgCtl: %08x\n", data->brdgCtl);
>> -
>> - pr_info(" portStatusReg: %08x\n", data->portStatusReg);
>> - pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus);
>> - pr_info(" busAgentStatus: %08x\n", data->busAgentStatus);
>> -
>> - pr_info(" deviceStatus: %08x\n", data->deviceStatus);
>> - pr_info(" slotStatus: %08x\n", data->slotStatus);
>> - pr_info(" linkStatus: %08x\n", data->linkStatus);
>> - pr_info(" devCmdStatus: %08x\n", data->devCmdStatus);
>> - pr_info(" devSecStatus: %08x\n", data->devSecStatus);
>> -
>> - pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus);
>> - pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus);
>> - pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus);
>> - pr_info(" tlpHdr1: %08x\n", data->tlpHdr1);
>> - pr_info(" tlpHdr2: %08x\n", data->tlpHdr2);
>> - pr_info(" tlpHdr3: %08x\n", data->tlpHdr3);
>> - pr_info(" tlpHdr4: %08x\n", data->tlpHdr4);
>> - pr_info(" sourceId: %08x\n", data->sourceId);
>> - pr_info(" errorClass: %016llx\n", data->errorClass);
>> - pr_info(" correlator: %016llx\n", data->correlator);
>> - pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr);
>> - pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr);
>> - pr_info(" lemFir: %016llx\n", data->lemFir);
>> - pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask);
>> - pr_info(" lemWOF: %016llx\n", data->lemWOF);
>> - pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus);
>> - pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus);
>> - pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0);
>> - pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1);
>> - pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus);
>> - pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
>> - pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0);
>> - pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1);
>> - pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus);
>> - pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
>> - pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0);
>> - pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1);
>> - pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus);
>> - pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
>> - pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0);
>> - pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1);
>> + pr_info(" brdgCtl: %s\n",
>> + pnv_pci_diag_field(&buf[0], 8, data->brdgCtl));
>> + pr_info(" UtlSts: %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 8, data->portStatusReg),
>> + pnv_pci_diag_field(&buf[1 * 24], 8, data->rootCmplxStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 8, data->busAgentStatus));
>> + pr_info(" RootSts: %s %s %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 8, data->deviceStatus),
>> + pnv_pci_diag_field(&buf[1 * 24], 8, data->slotStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 8, data->linkStatus),
>> + pnv_pci_diag_field(&buf[3 * 24], 8, data->devCmdStatus),
>> + pnv_pci_diag_field(&buf[4 * 24], 8, data->devSecStatus));
>> + pr_info(" RootErrSts: %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 8, data->rootErrorStatus),
>> + pnv_pci_diag_field(&buf[1 * 24], 8, data->uncorrErrorStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 8, data->corrErrorStatus));
>> + pr_info(" RootErrLog: %s %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 8, data->tlpHdr1),
>> + pnv_pci_diag_field(&buf[1 * 24], 8, data->tlpHdr2),
>> + pnv_pci_diag_field(&buf[2 * 24], 8, data->tlpHdr3),
>> + pnv_pci_diag_field(&buf[3 * 24], 8, data->tlpHdr4));
>> + pr_info(" RootErrLog1: %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 8, data->sourceId),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->errorClass),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->correlator));
>> + pr_info(" PhbSts: %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->p7iocPlssr),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->p7iocCsr));
>> + pr_info(" Lem: %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->lemFir),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->lemErrorMask),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->lemWOF));
>> + pr_info(" PhbErr: %s %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->phbErrorStatus),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->phbFirstErrorStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->phbErrorLog0),
>> + pnv_pci_diag_field(&buf[3 * 24], 16, data->phbErrorLog1));
>> + pr_info(" OutErr: %s %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->mmioErrorStatus),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->mmioFirstErrorStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->mmioErrorLog0),
>> + pnv_pci_diag_field(&buf[3 * 24], 16, data->mmioErrorLog1));
>> + pr_info(" InAErr: %s %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->dma0ErrorStatus),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->dma0FirstErrorStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->dma0ErrorLog0),
>> + pnv_pci_diag_field(&buf[3 * 24], 16, data->dma0ErrorLog1));
>> + pr_info(" InBErr: %s %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->dma1ErrorStatus),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->dma1FirstErrorStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->dma1ErrorLog0),
>> + pnv_pci_diag_field(&buf[3 * 24], 16, data->dma1ErrorLog1));
>>
>> for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
>> if ((data->pestA[i] >> 63) == 0 &&
>> (data->pestB[i] >> 63) == 0)
>> continue;
>>
>> - pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]);
>> - pr_info(" PESTB: %016llx\n", data->pestB[i]);
>> + pr_info(" PE[%3d] A/B: %s %s\n",
>> + i, pnv_pci_diag_field(&buf[0], 16, data->pestA[i]),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->pestB[i]));
>> }
>> }
>>
>> @@ -192,67 +228,79 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
>> struct OpalIoPhbErrorCommon *common)
>> {
>> struct OpalIoPhb3ErrorData *data;
>> - int i;
>> + char buf[120];
>> + int i = 0;
>>
>> + memset(buf, 0, 120);
>> data = (struct OpalIoPhb3ErrorData*)common;
>> pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n",
>> hose->global_number, common->version);
>>
>> - pr_info(" brdgCtl: %08x\n", data->brdgCtl);
>> -
>> - pr_info(" portStatusReg: %08x\n", data->portStatusReg);
>> - pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus);
>> - pr_info(" busAgentStatus: %08x\n", data->busAgentStatus);
>> -
>> - pr_info(" deviceStatus: %08x\n", data->deviceStatus);
>> - pr_info(" slotStatus: %08x\n", data->slotStatus);
>> - pr_info(" linkStatus: %08x\n", data->linkStatus);
>> - pr_info(" devCmdStatus: %08x\n", data->devCmdStatus);
>> - pr_info(" devSecStatus: %08x\n", data->devSecStatus);
>> -
>> - pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus);
>> - pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus);
>> - pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus);
>> - pr_info(" tlpHdr1: %08x\n", data->tlpHdr1);
>> - pr_info(" tlpHdr2: %08x\n", data->tlpHdr2);
>> - pr_info(" tlpHdr3: %08x\n", data->tlpHdr3);
>> - pr_info(" tlpHdr4: %08x\n", data->tlpHdr4);
>> - pr_info(" sourceId: %08x\n", data->sourceId);
>> - pr_info(" errorClass: %016llx\n", data->errorClass);
>> - pr_info(" correlator: %016llx\n", data->correlator);
>> -
>> - pr_info(" nFir: %016llx\n", data->nFir);
>> - pr_info(" nFirMask: %016llx\n", data->nFirMask);
>> - pr_info(" nFirWOF: %016llx\n", data->nFirWOF);
>> - pr_info(" PhbPlssr: %016llx\n", data->phbPlssr);
>> - pr_info(" PhbCsr: %016llx\n", data->phbCsr);
>> - pr_info(" lemFir: %016llx\n", data->lemFir);
>> - pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask);
>> - pr_info(" lemWOF: %016llx\n", data->lemWOF);
>> - pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus);
>> - pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus);
>> - pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0);
>> - pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1);
>> - pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus);
>> - pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
>> - pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0);
>> - pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1);
>> - pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus);
>> - pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
>> - pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0);
>> - pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1);
>> - pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus);
>> - pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
>> - pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0);
>> - pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1);
>> + pr_info(" brdgCtl: %s\n",
>> + pnv_pci_diag_field(&buf[0], 8, data->brdgCtl));
>> + pr_info(" UtlSts: %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 8, data->portStatusReg),
>> + pnv_pci_diag_field(&buf[1 * 24], 8, data->rootCmplxStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 8, data->busAgentStatus));
>> + pr_info(" RootSts: %s %s %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 8, data->deviceStatus),
>> + pnv_pci_diag_field(&buf[1 * 24], 8, data->slotStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 8, data->linkStatus),
>> + pnv_pci_diag_field(&buf[3 * 24], 8, data->devCmdStatus),
>> + pnv_pci_diag_field(&buf[4 * 24], 8, data->devSecStatus));
>> + pr_info(" RootErrSts: %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 8, data->rootErrorStatus),
>> + pnv_pci_diag_field(&buf[1 * 24], 8, data->uncorrErrorStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 8, data->corrErrorStatus));
>> + pr_info(" RootErrLog: %s %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 8, data->tlpHdr1),
>> + pnv_pci_diag_field(&buf[1 * 24], 8, data->tlpHdr2),
>> + pnv_pci_diag_field(&buf[2 * 24], 8, data->tlpHdr3),
>> + pnv_pci_diag_field(&buf[3 * 24], 8, data->tlpHdr4));
>> + pr_info(" RootErrLog1: %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 8, data->sourceId),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->errorClass),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->correlator));
>> + pr_info(" nFir: %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->nFir),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->nFirMask),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->nFirWOF));
>> + pr_info(" PhbSts: %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->phbPlssr),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->phbCsr));
>> + pr_info(" Lem: %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->lemFir),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->lemErrorMask),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->lemWOF));
>> + pr_info(" PhbErr: %s %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->phbErrorStatus),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->phbFirstErrorStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->phbErrorLog0),
>> + pnv_pci_diag_field(&buf[3 * 24], 16, data->phbErrorLog1));
>> + pr_info(" OutErr: %s %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->mmioErrorStatus),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->mmioFirstErrorStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->mmioErrorLog0),
>> + pnv_pci_diag_field(&buf[3 * 24], 16, data->mmioErrorLog1));
>> + pr_info(" InAErr: %s %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->dma0ErrorStatus),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->dma0FirstErrorStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->dma0ErrorLog0),
>> + pnv_pci_diag_field(&buf[3 * 24], 16, data->dma0ErrorLog1));
>> + pr_info(" InBErr: %s %s %s %s\n",
>> + pnv_pci_diag_field(&buf[0], 16, data->dma1ErrorStatus),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->dma1FirstErrorStatus),
>> + pnv_pci_diag_field(&buf[2 * 24], 16, data->dma1ErrorLog0),
>> + pnv_pci_diag_field(&buf[3 * 24], 16, data->dma1ErrorLog1));
>>
>> for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
>> if ((data->pestA[i] >> 63) == 0 &&
>> (data->pestB[i] >> 63) == 0)
>> continue;
>>
>> - pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]);
>> - pr_info(" PESTB: %016llx\n", data->pestB[i]);
>> + pr_info(" PE[%3d] A/B: %s %s\n",
>> + i, pnv_pci_diag_field(&buf[0], 16, data->pestA[i]),
>> + pnv_pci_diag_field(&buf[1 * 24], 16, data->pestB[i]));
>> }
>> }
>>
>
>
More information about the Linuxppc-dev
mailing list