[Skiboot] [RFC] hw/npu2: Implement logging HMI actions
Balbir Singh
bsingharora at gmail.com
Mon Nov 13 21:13:34 AEDT 2017
Log HMI errors as a starting step. We'll need to get Linux to deduce
and interpret the HMI event. The expectation is that we'll add a
corresponding handler in HMI handling parts to deal with the various
errors that have not been unhandled.
Signed-off-by: Balbir Singh <bsingharora at gmail.com>
---
core/hmi.c | 84 ++++++++++++++++++++++++++++++++++++++++++++-
include/npu2-regs.h | 18 ++++++++++
include/opal-api.h | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 199 insertions(+), 1 deletion(-)
diff --git a/core/hmi.c b/core/hmi.c
index 07c08462..b450032c 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -23,6 +23,7 @@
#include <cpu.h>
#include <chip.h>
#include <npu-regs.h>
+#include <npu2-regs.h>
#include <npu.h>
#include <capp.h>
@@ -517,6 +518,87 @@ static void find_nx_checkstop_reason(int flat_chip_id,
*event_generated = true;
}
+static void find_npu2_checkstop_reason(int flat_chip_id,
+ struct OpalHMIEvent *hmi_evt,
+ bool *event_generated)
+{
+ struct phb *phb;
+ struct npu *p = NULL;
+ int i;
+
+ uint64_t npu2_fir;
+ uint64_t npu2_fir_mask;
+ uint64_t npu2_fir_action0;
+ uint64_t npu2_fir_action1;
+ uint64_t npu2_fir_addr;
+ uint64_t npu2_fir_mask_addr;
+ uint64_t npu2_fir_action0_addr;
+ uint64_t npu2_fir_action1_addr;
+ uint64_t fatal_errors;
+
+ /* Only check for NPU errors if the chip has a NPU */
+ if (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P9)
+ return;
+
+ /* Find the NPU on the chip associated with the HMI. */
+ for_each_phb(phb) {
+ /* NOTE: if a chip ever has >1 NPU this will need adjusting */
+ if (dt_node_is_compatible(phb->dt_node, "ibm,power9-npu-pciex") &&
+ (dt_get_chip_id(phb->dt_node) == flat_chip_id)) {
+ p = phb_to_npu(phb);
+ break;
+ }
+ }
+
+ /* If we didn't find a NPU on the chip, it's not our checkstop. */
+ if (p == NULL)
+ return;
+
+ npu2_fir_addr = NPU2_FIR_REGISTER_0;
+ npu2_fir_mask_addr = NPU2_FIR_REGISTER_0_MASK;
+ npu2_fir_action0_addr = NPU2_FIR_REGISTER_0_ACTION0;
+ npu2_fir_action1_addr = NPU2_FIR_REGISTER_0_ACTION1;
+
+ for (i = 0; i < NPU2_TOTAL_FIR_REGISTERS; i++) {
+ /* Read all the registers necessary to find a checkstop condition. */
+ if (xscom_read(flat_chip_id, npu2_fir_addr, &npu2_fir) ||
+ xscom_read(flat_chip_id, npu2_fir_mask_addr, &npu2_fir_mask) ||
+ xscom_read(flat_chip_id, npu2_fir_action0_addr, &npu2_fir_action0) ||
+ xscom_read(flat_chip_id, npu2_fir_action1_addr, &npu2_fir_action1)) {
+ prerror("HMI: Couldn't read NPU FIR register%d with XSCOM\n", i);
+ continue;
+ }
+
+ fatal_errors = npu2_fir & ~npu2_fir_mask & npu2_fir_action0 & npu2_fir_action1;
+
+ /* If there's no errors, we don't need to do anything. */
+ if (!fatal_errors)
+ continue;
+
+ prlog(PR_ERR, "NPU: FIR#%d FIR 0x%016llx mask 0x%016llx\n",
+ i, npu2_fir, npu2_fir_mask);
+ prlog(PR_ERR, "NPU: ACTION0 0x%016llx, ACTION1 0x%016llx\n",
+ npu2_fir_action0, npu2_fir_action1);
+
+ /* Can't do a fence yet, we are just logging fir information for now */
+ npu2_fir_addr += NPU2_FIR_OFFSET;
+ npu2_fir_mask_addr += NPU2_FIR_OFFSET;
+ npu2_fir_action0_addr += NPU2_FIR_OFFSET;
+ npu2_fir_action1_addr += NPU2_FIR_OFFSET;
+
+ }
+
+ /* Set up the HMI event */
+ hmi_evt->severity = OpalHMI_SEV_WARNING;
+ hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
+ hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU;
+ hmi_evt->u.xstop_error.u.chip_id = flat_chip_id;
+
+ /* Marking the event as recoverable so that we don't crash */
+ queue_hmi_event(hmi_evt, 1);
+ *event_generated = true;
+}
+
static void find_npu_checkstop_reason(int flat_chip_id,
struct OpalHMIEvent *hmi_evt,
bool *event_generated)
@@ -532,7 +614,7 @@ static void find_npu_checkstop_reason(int flat_chip_id,
/* Only check for NPU errors if the chip has a NPU */
if (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P8NVL)
- return;
+ return find_npu2_checkstop_reason(flat_chip_id, hmi_evt, event_generated);
/* Find the NPU on the chip associated with the HMI. */
for_each_phb(phb) {
diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index fd1367b7..b7522f4a 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -466,4 +466,22 @@ void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask);
#define NPU2_DD1_MISC_SCOM_IND_SCOM_DATA 0x38f
#define NPU2_MISC_SCOM_IND_SCOM_DATA 0x68f
+#define NPU2_FIR_REGISTER_0 0x0000000005013C00
+#define NPU2_FIR_REGISTER_0_MASK 0x0000000005013C03
+#define NPU2_FIR_REGISTER_0_ACTION0 0x0000000005013C06
+#define NPU2_FIR_REGISTER_0_ACTION1 0x0000000005013C07
+
+#define NPU2_FIR_REGISTER_1 0x0000000005013C40
+#define NPU2_FIR_REGISTER_1_MASK 0x0000000005013C43
+#define NPU2_FIR_REGISTER_1_ACTION0 0x0000000005013C46
+#define NPU2_FIR_REGISTER_1_ACTION1 0x0000000005013C47
+
+#define NPU2_FIR_REGISTER_2 0x0000000005013C80
+#define NPU2_FIR_REGISTER_2_MASK 0x0000000005013C83
+#define NPU2_FIR_REGISTER_2_ACTION0 0x0000000005013C86
+#define NPU2_FIR_REGISTER_2_ACTION1 0x0000000005013C87
+
+#define NPU2_TOTAL_FIR_REGISTERS 3
+#define NPU2_FIR_OFFSET 0x40
+
#endif /* __NPU2_REGS_H */
diff --git a/include/opal-api.h b/include/opal-api.h
index 0bc036ed..cef88f68 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -725,6 +725,104 @@ enum OpalHMI_NestAccelXstopReason {
NX_CHECKSTOP_PBI_ISN_UE = 0x00002000,
};
+/*
+ * Can't use enums for 64 bit values, use #defines
+ */
+#define NPU2_CHECKSTOP_REG0_NTL_ARRAY_CE PPC_BIT(0)
+#define NPU2_CHECKSTOP_REG0_NTL_ARRAY_HDR_CE PPC_BIT(1)
+#define NPU2_CHECKSTOP_REG0_NTL_ARRAY_DATA_UE PPC_BIT(2)
+#define NPU2_CHECKSTOP_REG0_NTL_NVL_FLIT_PERR PPC_BIT(3)
+#define NPU2_CHECKSTOP_REG0_NTL_NVL_DATA_PERR PPC_BIT(4)
+#define NPU2_CHECKSTOP_REG0_NTL_NVL_PKT_MALFOR PPC_BIT(5)
+#define NPU2_CHECKSTOP_REG0_NTL_NVL_PKT_UNSUPPORTED PPC_BIT(6)
+#define NPU2_CHECKSTOP_REG0_NTL_NVL_CONFIG_ERR PPC_BIT(7)
+#define NPU2_CHECKSTOP_REG0_NTL_NVL_CRC_ERR PPC_BIT(8)
+#define NPU2_CHECKSTOP_REG0_NTL_PRI_ERR PPC_BIT(9)
+#define NPU2_CHECKSTOP_REG0_NTL_LOGIC_ERR PPC_BIT(10)
+#define NPU2_CHECKSTOP_REG0_NTL_LMD_POISON PPC_BIT(11)
+#define NPU2_CHECKSTOP_REG0_NTL_ARRAY_DATA_SUE PPC_BIT(12)
+#define NPU2_CHECKSTOP_REG0_CTL_ARRAY_CE PPC_BIT(13)
+#define NPU2_CHECKSTOP_REG0_CTL_PBUS_RECOV_ERR PPC_BIT(14)
+#define NPU2_CHECKSTOP_REG0_CTL_REG_RING_ERR PPC_BIT(15)
+#define NPU2_CHECKSTOP_REG0_CTL_MMIO_ST_DATA_UE PPC_BIT(16)
+#define NPU2_CHECKSTOP_REG0_CTL_PEF PPC_BIT(17)
+#define NPU2_CHECKSTOP_REG0_CTL_NVL_CFG_ERR PPC_BIT(18)
+#define NPU2_CHECKSTOP_REG0_CTL_NVL_FATAL_ERR PPC_BIT(19)
+#define NPU2_CHECKSTOP_REG0_RESERVED_1 PPC_BIT(20)
+#define NPU2_CHECKSTOP_REG0_CTL_ARRAY_UE PPC_BIT(21)
+#define NPU2_CHECKSTOP_REG0_CTL_PBUS_PERR PPC_BIT(22)
+#define NPU2_CHECKSTOP_REG0_CTL_PBUS_FATAL_ERR PPC_BIT(23)
+#define NPU2_CHECKSTOP_REG0_CTL_PBUS_CONFIG_ERR PPC_BIT(24)
+#define NPU2_CHECKSTOP_REG0_CTL_FWD_PROGRESS_ERR PPC_BIT(25)
+#define NPU2_CHECKSTOP_REG0_CTL_LOGIC_ERR PPC_BIT(26)
+#define NPU2_CHECKSTOP_REG0_DAT_DATA_BE_UE PPC_BIT(29)
+#define NPU2_CHECKSTOP_REG0_DAT_DATA_BE_CE PPC_BIT(30)
+#define NPU2_CHECKSTOP_REG0_DAT_DATA_BE_PERR PPC_BIT(31)
+#define NPU2_CHECKSTOP_REG0_DAT_CREG_PERR PPC_BIT(32)
+#define NPU2_CHECKSTOP_REG0_DAT_RTAG_PERR PPC_BIT(33)
+#define NPU2_CHECKSTOP_REG0_DAT_STATE_PERR PPC_BIT(34)
+#define NPU2_CHECKSTOP_REG0_DAT_LOGIC_ERR PPC_BIT(35)
+#define NPU2_CHECKSTOP_REG0_DAT_DATA_BE_SUE PPC_BIT(36)
+#define NPU2_CHECKSTOP_REG0_DAT_PBRX_SUE PPC_BIT(37)
+#define NPU2_CHECKSTOP_REG0_XTS_INT PPC_BIT(40)
+#define NPU2_CHECKSTOP_REG0_XTS_SRAM_CE PPC_BIT(41)
+#define NPU2_CHECKSTOP_REG0_XTS_SRAM_UE PPC_BIT(42)
+#define NPU2_CHECKSTOP_REG0_XTS_PROTOCOL_CE PPC_BIT(43)
+#define NPU2_CHECKSTOP_REG0_XTS_PROTOCOL_UE PPC_BIT(44)
+#define NPU2_CHECKSTOP_REG0_XTS_PBUS_PROTOCOL PPC_BIT(45)
+
+#define NPU2_CHECKSTOP_REG1_NDL_BRK0_STALL PPC_BIT(0)
+#define NPU2_CHECKSTOP_REG1_NDL_BRK0_NOSTALL PPC_BIT(1)
+#define NPU2_CHECKSTOP_REG1_NDL_BRK1_STALL PPC_BIT(2)
+#define NPU2_CHECKSTOP_REG1_NDL_BRK1_NOSTALL PPC_BIT(3)
+#define NPU2_CHECKSTOP_REG1_NDL_BRK2_STALL PPC_BIT(4)
+#define NPU2_CHECKSTOP_REG1_NDL_BRK2_NOSTALL PPC_BIT(5)
+#define NPU2_CHECKSTOP_REG1_NDL_BRK3_STALL PPC_BIT(6)
+#define NPU2_CHECKSTOP_REG1_NDL_BRK3_NOSTALL PPC_BIT(7)
+#define NPU2_CHECKSTOP_REG1_NDL_BRK4_STALL PPC_BIT(8)
+#define NPU2_CHECKSTOP_REG1_NDL_BRK4_NOSTALL PPC_BIT(9)
+#define NPU2_CHECKSTOP_REG1_NDL_BRK5_STALL PPC_BIT(10)
+#define NPU2_CHECKSTOP_REG1_NDL_BRK5_NOSTALL PPC_BIT(11)
+#define NPU2_CHECKSTOP_REG1_MISC_REG_RING_ERR PPC_BIT(12)
+#define NPU2_CHECKSTOP_REG1_MISC_INT_RA_PERR PPC_BIT(13)
+#define NPU2_CHECKSTOP_REG1_MISC_DA_ADDR_PERR PPC_BIT(14)
+#define NPU2_CHECKSTOP_REG1_MISC_CTRL_PERR PPC_BIT(15)
+#define NPU2_CHECKSTOP_REG1_MISC_NMMU_ERR PPC_BIT(16)
+#define NPU2_CHECKSTOP_REG1_ATS_TVT_ENTRY_INVALID PPC_BIT(17)
+#define NPU2_CHECKSTOP_REG1_ATS_TVT_ADDR_RANGE_ERR PPC_BIT(18)
+#define NPU2_CHECKSTOP_REG1_ATS_TCE_PAGE_ACCESS_CA_ERR PPC_BIT(19)
+#define NPU2_CHECKSTOP_REG1_ATS_TCE_CACHE_MULT_HIT_ERR PPC_BIT(20)
+#define NPU2_CHECKSTOP_REG1_ATS_TCE_PAGE_ACCESS_TW_ERR PPC_BIT(21)
+#define NPU2_CHECKSTOP_REG1_ATS_TCE_REQ_TO_ERR PPC_BIT(22)
+#define NPU2_CHECKSTOP_REG1_ATS_TCD_PERR PPC_BIT(23)
+#define NPU2_CHECKSTOP_REG1_ATS_TDR_PERR PPC_BIT(24)
+#define NPU2_CHECKSTOP_REG1_ATS_AT_EA_UE PPC_BIT(25)
+#define NPU2_CHECKSTOP_REG1_ATS_AT_EA_CE PPC_BIT(26)
+#define NPU2_CHECKSTOP_REG1_ATS_AT_TDRMEM_UE PPC_BIT(27)
+#define NPU2_CHECKSTOP_REG1_ATS_AT_TDRMEM_CE PPC_BIT(28)
+#define NPU2_CHECKSTOP_REG1_ATS_AT_RSPOUT_UE PPC_BIT(29)
+#define NPU2_CHECKSTOP_REG1_ATS_AT_RSPOUT_CE PPC_BIT(30)
+#define NPU2_CHECKSTOP_REG1_ATS_TVT_PERR PPC_BIT(31)
+#define NPU2_CHECKSTOP_REG1_ATS_IODA_ADDR_PERR PPC_BIT(32)
+#define NPU2_CHECKSTOP_REG1_ATS_NPU_CTRL_PERR PPC_BIT(33)
+#define NPU2_CHECKSTOP_REG1_ATS_NPU_TOR_PERR PPC_BIT(34)
+#define NPU2_CHECKSTOP_REG1_ATS_INVAL_IODA_TBL_SEL PPC_BIT(35)
+
+#define NPU2_CHECKSTOP_REG2_XSL_MMIO_INVALIDATE_REQ_WHILE_1_INPROG PPC_BIT(36)
+#define NPU2_CHECKSTOP_REG2_XSL_UNEXPECTED_ITAG_PORT_0 PPC_BIT(37)
+#define NPU2_CHECKSTOP_REG2_XSL_UNEXPECTED_ITAG_PORT_1 PPC_BIT(38)
+#define NPU2_CHECKSTOP_REG2_XSL_UNEXPECTED_RD_PEE_COMPLETION PPC_BIT(39)
+#define NPU2_CHECKSTOP_REG2_XSL_UNEXPECTED_CO_RESP PPC_BIT(40)
+#define NPU2_CHECKSTOP_REG2_XSL_XLAT_REQ_WHILE_SPAP_INVALID PPC_BIT(41)
+#define NPU2_CHECKSTOP_REG2_XSL_INVALID_PEE PPC_BIT(42)
+#define NPU2_CHECKSTOP_REG2_XSL_BLOOM_FILTER_PROTECT_ERR PPC_BIT(43)
+#define NPU2_CHECKSTOP_REG2_XSL_CE PPC_BIT(46)
+#define NPU2_CHECKSTOP_REG2_XSL_UE PPC_BIT(47)
+#define NPU2_CHECKSTOP_REG2_XSL_SLBI_TLBI_BUFF_OVERFLOW PPC_BIT(48)
+#define NPU2_CHECKSTOP_REG2_XSL_SBE_CORR_ERR_PB_CHKOUT_RSP_DATA PPC_BIT(49)
+#define NPU2_CHECKSTOP_REG2_XSL_UE_PB_CHKOUT_RSP_DATA PPC_BIT(50)
+#define NPU2_CHECKSTOP_REG2_XSL_SUE_PB_CHKOUT_RSP_DATA PPC_BIT(51)
+
struct OpalHMIEvent {
uint8_t version; /* 0x00 */
uint8_t severity; /* 0x01 */
--
2.13.6
More information about the Skiboot
mailing list