[Skiboot] [PATCH V2 10/15] pau: hmi scom dump

Frederic Barrat fbarrat at linux.ibm.com
Sat Oct 2 00:50:35 AEST 2021



On 23/09/2021 11:03, Christophe Lombard wrote:
> This patch add a new function to dump PAU registers when a HMI has been
> raised and an OpenCAPI link has been hit by an error.
> 
> For each register, the scom address and the register value are printed.
> 
> The hmi.c has been redesigned in order to support the new PHB/PCIEX
> type (PAU OpenCapi). Now, the *npu* functions support NPU and PAU units of
> P8, P9 and P10 chips.
> 
> Signed-off-by: Christophe Lombard <clombard at linux.vnet.ibm.com>
> ---


Reviewed-by: Frederic Barrat <fbarrat at linux.ibm.com>


>   core/hmi.c               | 276 +++++++++++++++++++--------------------
>   hw/npu2-common.c         |  27 +---
>   hw/pau.c                 |  44 +++++++
>   include/npu2-regs.h      |   5 +
>   include/npu2.h           |   2 +-
>   include/pau-regs.h       |  24 ++++
>   include/pau.h            |   2 +
>   include/xscom-p10-regs.h |   3 +
>   8 files changed, 214 insertions(+), 169 deletions(-)
> 
> diff --git a/core/hmi.c b/core/hmi.c
> index 9363cc5f..ce5abd7d 100644
> --- a/core/hmi.c
> +++ b/core/hmi.c
> @@ -19,8 +19,10 @@
>   #include <pci.h>
>   #include <cpu.h>
>   #include <chip.h>
> +#include <pau-regs.h>
>   #include <npu-regs.h>
>   #include <npu2-regs.h>
> +#include <pau.h>
>   #include <npu2.h>
>   #include <npu.h>
>   #include <capp.h>
> @@ -717,13 +719,7 @@ static void find_nx_checkstop_reason(int flat_chip_id,
>   	queue_hmi_event(hmi_evt, 0, out_flags);
>   }
>   
> -static bool phb_is_npu2(struct dt_node *dn)
> -{
> -	return (dt_node_is_compatible(dn, "ibm,power9-npu-pciex") ||
> -		dt_node_is_compatible(dn, "ibm,power9-npu-opencapi-pciex"));
> -}
> -
> -static void add_npu2_xstop_reason(uint32_t *xstop_reason, uint8_t reason)
> +static void add_npu_xstop_reason(uint32_t *xstop_reason, uint8_t reason)
>   {
>   	int i, reason_count;
>   	uint8_t *ptr;
> @@ -739,8 +735,8 @@ static void add_npu2_xstop_reason(uint32_t *xstop_reason, uint8_t reason)
>   	}
>   }
>   
> -static void encode_npu2_xstop_reason(uint32_t *xstop_reason,
> -				uint64_t fir, int fir_number)
> +static void encode_npu_xstop_reason(uint32_t *xstop_reason,
> +				    uint64_t fir, int fir_number)
>   {
>   	int bit;
>   	uint8_t reason;
> @@ -758,114 +754,125 @@ static void encode_npu2_xstop_reason(uint32_t *xstop_reason,
>   		bit = ilog2(fir);
>   		reason = fir_number << 6;
>   		reason |= (63 - bit); // IBM numbering
> -		add_npu2_xstop_reason(xstop_reason, reason);
> +		add_npu_xstop_reason(xstop_reason, reason);
>   		fir ^= 1ULL << bit;
>   	}
>   }
>   
> -static void find_npu2_checkstop_reason(int flat_chip_id,
> -				      struct OpalHMIEvent *hmi_evt,
> -				      uint64_t *out_flags)
> +static bool npu_fir_errors(struct phb *phb, int flat_chip_id,
> +			   uint32_t *xstop_reason)
>   {
> -	struct phb *phb;
> -	int i;
> -	bool npu2_hmi_verbose = false, found = false;
> -	uint64_t npu2_fir;
> -	uint64_t npu2_fir_mask;
> -	uint64_t npu2_fir_action0;
> -	uint64_t npu2_fir_action1;
> -	uint64_t npu2_fir_addr;
> -	uint64_t npu2_fir_mask_addr;
> -	uint64_t npu2_fir_action0_addr;
> -	uint64_t npu2_fir_action1_addr;
> +	uint64_t fir, fir_mask;
> +	uint64_t fir_action0, fir_action1;
> +	uint64_t fir_reg, fir_mask_reg;
> +	uint64_t fir_action0_reg, fir_action1_reg;
>   	uint64_t fatal_errors;
> -	uint32_t xstop_reason = 0;
> -	int total_errors = 0;
> +	uint64_t xscom_base;
> +	bool fir_errors = false;
> +	int fir_regs;
>   	const char *loc;
> -
> -	/* NPU2 only */
> -	if (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P9)
> -		return;
> -
> -	/* Find the NPU on the chip associated with the HMI. */
> -	for_each_phb(phb) {
> -		/* NOTE: if a chip ever has >1 NPU this will need adjusting */
> -		if (phb_is_npu2(phb->dt_node) &&
> -		    (dt_get_chip_id(phb->dt_node) == flat_chip_id)) {
> -			found = true;
> -			break;
> +	struct npu *npu;
> +	struct npu2 *npu2 = NULL;
> +	struct npu2_dev *dev;
> +	struct pau *pau;
> +
> +	fir_regs = (phb->phb_type == phb_type_pcie_v3) ? 1 : 3;
> +
> +	for (uint32_t i = 0; i < fir_regs; i++) {
> +		switch (phb->phb_type) {
> +		case phb_type_pcie_v3:
> +			fir_reg = NX_FIR;
> +			fir_mask_reg = NX_FIR_MASK;
> +			fir_action0_reg = NX_FIR_ACTION0;
> +			fir_action1_reg = NX_FIR_ACTION1;
> +
> +			npu = phb_to_npu(phb);
> +			if (npu != NULL)
> +				xscom_base = npu->at_xscom;
> +			else
> +				continue;
> +		break;
> +		case phb_type_npu_v2:
> +			fir_reg = NPU2_FIR(i);
> +			fir_mask_reg = NPU2_FIR_MASK(i);
> +			fir_action0_reg = NPU2_FIR_ACTION0(i);
> +			fir_action1_reg = NPU2_FIR_ACTION1(i);
> +			npu2 = phb_to_npu2_nvlink(phb);
> +			xscom_base = npu2->xscom_base;
> +		break;
> +		case phb_type_npu_v2_opencapi:
> +			fir_reg = NPU2_FIR(i);
> +			fir_mask_reg = NPU2_FIR_MASK(i);
> +			fir_action0_reg = NPU2_FIR_ACTION0(i);
> +			fir_action1_reg = NPU2_FIR_ACTION1(i);
> +			dev = phb_to_npu2_dev_ocapi(phb);
> +			npu2 = dev->npu;
> +			xscom_base = npu2->xscom_base;
> +		break;
> +		case phb_type_pau_opencapi:
> +			fir_reg = PAU_FIR(i);
> +			fir_mask_reg = PAU_FIR_MASK(i);
> +			fir_action0_reg = PAU_FIR_ACTION0(i);
> +			fir_action1_reg = PAU_FIR_ACTION1(i);
> +			pau = ((struct pau_dev *)(pau_phb_to_opencapi_dev(phb)))->pau;
> +			xscom_base = pau->xscom_base;
> +		break;
> +		default:
> +			continue;
>   		}
> -	}
> -
> -	/* If we didn't find a NPU on the chip, it's not our checkstop. */
> -	if (!found)
> -		return;
>   
> -	npu2_fir_addr = NPU2_FIR_REGISTER_0;
> -	npu2_fir_mask_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_MASK_OFFSET;
> -	npu2_fir_action0_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_ACTION0_OFFSET;
> -	npu2_fir_action1_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_ACTION1_OFFSET;
> -
> -	for (i = 0; i < NPU2_TOTAL_FIR_REGISTERS; i++) {
> -		/* Read all the registers necessary to find a checkstop condition. */
> -		if (xscom_read(flat_chip_id, npu2_fir_addr, &npu2_fir) ||
> -			xscom_read(flat_chip_id, npu2_fir_mask_addr, &npu2_fir_mask) ||
> -			xscom_read(flat_chip_id, npu2_fir_action0_addr, &npu2_fir_action0) ||
> -			xscom_read(flat_chip_id, npu2_fir_action1_addr, &npu2_fir_action1)) {
> -			prerror("HMI: Couldn't read NPU FIR register%d with XSCOM\n", i);
> +		if (xscom_read(flat_chip_id, xscom_base + fir_reg, &fir) ||
> +		    xscom_read(flat_chip_id, xscom_base + fir_mask_reg, &fir_mask) ||
> +		    xscom_read(flat_chip_id, xscom_base + fir_action0_reg, &fir_action0) ||
> +		    xscom_read(flat_chip_id, xscom_base + fir_action1_reg, &fir_action1)) {
> +			prerror("HMI: Couldn't read NPU/PAU FIR register%d with XSCOM\n", i);
>   			continue;
>   		}
>   
> -		fatal_errors = npu2_fir & ~npu2_fir_mask & npu2_fir_action0 & npu2_fir_action1;
> +		fatal_errors = fir & ~fir_mask & fir_action0 & fir_action1;
>   
>   		if (fatal_errors) {
>   			loc = chip_loc_code(flat_chip_id);
>   			if (!loc)
>   				loc = "Not Available";
> -			prlog(PR_ERR, "NPU: [Loc: %s] P:%d FIR#%d FIR 0x%016llx mask 0x%016llx\n",
> -					loc, flat_chip_id, i, npu2_fir, npu2_fir_mask);
> -			prlog(PR_ERR, "NPU: [Loc: %s] P:%d ACTION0 0x%016llx, ACTION1 0x%016llx\n",
> -					loc, flat_chip_id, npu2_fir_action0, npu2_fir_action1);
> -			total_errors++;
> -
> -			encode_npu2_xstop_reason(&xstop_reason, fatal_errors, i);
> +			prlog(PR_ERR, "NPU/PAU: [Loc: %s] P:%d FIR#%d "
> +				      "FIR 0x%016llx mask 0x%016llx\n",
> +				      loc, flat_chip_id, i, fir, fir_mask);
> +			prlog(PR_ERR, "NPU/PAU: [Loc: %s] P:%d ACTION0 "
> +				      "0x%016llx, ACTION1 0x%016llx\n",
> +				      loc, flat_chip_id, fir_action0, fir_action1);
> +			if (phb->phb_type != phb_type_pcie_v3)
> +				encode_npu_xstop_reason(xstop_reason,
> +							fatal_errors,
> +							i);
> +			fir_errors = true;
>   		}
> -
> -		/* Can't do a fence yet, we are just logging fir information for now */
> -		npu2_fir_addr += NPU2_FIR_OFFSET;
> -		npu2_fir_mask_addr += NPU2_FIR_OFFSET;
> -		npu2_fir_action0_addr += NPU2_FIR_OFFSET;
> -		npu2_fir_action1_addr += NPU2_FIR_OFFSET;
> -
>   	}
>   
> -	if (!total_errors)
> -		return;
> -
> -	npu2_hmi_verbose = nvram_query_eq_safe("npu2-hmi-verbose", "true");
> -	/* Force this for now until we sort out something better */
> -	npu2_hmi_verbose = true;
> +	/* dump registers */
> +	if (fir_errors) {
> +		switch (phb->phb_type) {
> +		case phb_type_npu_v2:
> +		case phb_type_npu_v2_opencapi:
> +			npu2_dump_scoms(npu2, flat_chip_id);
> +		break;
> +		case phb_type_pau_opencapi:
> +			pau_opencapi_dump_scoms(pau);
> +		break;
> +		default:
> +		break;
> +		}
>   
> -	if (npu2_hmi_verbose) {
> -		npu2_dump_scoms(flat_chip_id);
>   		prlog(PR_ERR, " _________________________ \n");
> -		prlog(PR_ERR, "<    It's Debug time!     >\n");
> +		prlog(PR_ERR, "< It's Debug time!        >\n");
>   		prlog(PR_ERR, " ------------------------- \n");
> -		prlog(PR_ERR, "       \\   ,__,            \n");
> -		prlog(PR_ERR, "        \\  (oo)____        \n");
> -		prlog(PR_ERR, "           (__)    )\\      \n");
> +		prlog(PR_ERR, "       \\   ,__,           \n");
> +		prlog(PR_ERR, "        \\  (oo)____       \n");
> +		prlog(PR_ERR, "           (__)    )\\     \n");
>   		prlog(PR_ERR, "              ||--|| *     \n");
>   	}
>   
> -	/* Set up the HMI event */
> -	hmi_evt->severity = OpalHMI_SEV_WARNING;
> -	hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
> -	hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU;
> -	hmi_evt->u.xstop_error.xstop_reason = cpu_to_be32(xstop_reason);
> -	hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id);
> -
> -	/* Marking the event as recoverable so that we don't crash */
> -	queue_hmi_event(hmi_evt, 1, out_flags);
> +	return fir_errors;
>   }
>   
>   static void find_npu_checkstop_reason(int flat_chip_id,
> @@ -873,67 +880,47 @@ static void find_npu_checkstop_reason(int flat_chip_id,
>   				      uint64_t *out_flags)
>   {
>   	struct phb *phb;
> -	struct npu *p = NULL;
> -
> -	uint64_t npu_fir;
> -	uint64_t npu_fir_mask;
> -	uint64_t npu_fir_action0;
> -	uint64_t npu_fir_action1;
> -	uint64_t fatal_errors;
> -
> -	/* Only check for NPU errors if the chip has a NPU */
> -	if (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P8NVL)
> -		return find_npu2_checkstop_reason(flat_chip_id, hmi_evt, out_flags);
> -
> -	/* Find the NPU on the chip associated with the HMI. */
> -	for_each_phb(phb) {
> -		/* NOTE: if a chip ever has >1 NPU this will need adjusting */
> -		if (dt_node_is_compatible(phb->dt_node, "ibm,power8-npu-pciex") &&
> -		    (dt_get_chip_id(phb->dt_node) == flat_chip_id)) {
> -			p = phb_to_npu(phb);
> -			break;
> -		}
> -	}
> +	struct dt_node *dn;
> +	uint32_t xstop_reason = 0;
>   
> -	/* If we didn't find a NPU on the chip, it's not our checkstop. */
> -	if (p == NULL)
> +	/* Only check for NPU errors if the chip has a NPU/PAU */
> +	if ((PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P8NVL) &&
> +	    (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P9) &&
> +	    (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P10))
>   		return;
>   
> -	/* Read all the registers necessary to find a checkstop condition. */
> -	if (xscom_read(flat_chip_id,
> -		       p->at_xscom + NX_FIR, &npu_fir) ||
> -	    xscom_read(flat_chip_id,
> -		       p->at_xscom + NX_FIR_MASK, &npu_fir_mask) ||
> -	    xscom_read(flat_chip_id,
> -		       p->at_xscom + NX_FIR_ACTION0, &npu_fir_action0) ||
> -	    xscom_read(flat_chip_id,
> -		       p->at_xscom + NX_FIR_ACTION1, &npu_fir_action1)) {
> -		prerror("Couldn't read NPU registers with XSCOM\n");
> -		return;
> -	}
> +	/* Find the NPU/PAU on the chip associated with the HMI. */
> +	for_each_phb(phb) {
> +		dn = phb->dt_node;
>   
> -	fatal_errors = npu_fir & ~npu_fir_mask & npu_fir_action0 & npu_fir_action1;
> +		if (!(dt_node_is_compatible(dn, "ibm,power8-npu-pciex") ||
> +		      dt_node_is_compatible(dn, "ibm,power9-npu-pciex") ||
> +		      dt_node_is_compatible(dn, "ibm,power9-npu-opencapi-pciex") ||
> +		      dt_node_is_compatible(dn, "ibm,power10-pau-opencapi-pciex")))
> +			continue;
>   
> -	/* If there's no errors, we don't need to do anything. */
> -	if (!fatal_errors)
> -		return;
> +		if (dt_get_chip_id(dn) != flat_chip_id)
> +			continue;
>   
> -	prlog(PR_DEBUG, "NPU: FIR 0x%016llx mask 0x%016llx\n",
> -	      npu_fir, npu_fir_mask);
> -	prlog(PR_DEBUG, "NPU: ACTION0 0x%016llx, ACTION1 0x%016llx\n",
> -	      npu_fir_action0, npu_fir_action1);
> +		/* Read all the registers necessary to find a checkstop condition. */
> +		if (!npu_fir_errors(phb, flat_chip_id, &xstop_reason))
> +			continue;
>   
> -	/* Set the NPU to fenced since it can't recover. */
> -	npu_set_fence_state(p, true);
> +		if (phb->phb_type == phb_type_pcie_v3) {
> +			/* Set the NPU to fenced since it can't recover. */
> +			npu_set_fence_state(phb_to_npu(phb), true);
> +		}
>   
> -	/* Set up the HMI event */
> -	hmi_evt->severity = OpalHMI_SEV_WARNING;
> -	hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
> -	hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU;
> -	hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id);
> +		/* Set up the HMI event */
> +		hmi_evt->severity = OpalHMI_SEV_WARNING;
> +		hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
> +		hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU;
> +		hmi_evt->u.xstop_error.xstop_reason = xstop_reason;
> +		hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id);
>   
> -	/* The HMI is "recoverable" because it shouldn't crash the system */
> -	queue_hmi_event(hmi_evt, 1, out_flags);
> +		/* Marking the event as recoverable so that we don't crash */
> +		queue_hmi_event(hmi_evt, 1, out_flags);
> +	}
>   }
>   
>   static void decode_malfunction(struct OpalHMIEvent *hmi_evt, uint64_t *out_flags)
> @@ -962,7 +949,8 @@ static void decode_malfunction(struct OpalHMIEvent *hmi_evt, uint64_t *out_flags
>   			xscom_write(this_cpu()->chip_id, malf_alert_scom,
>   								~PPC_BIT(i));
>   			find_capp_checkstop_reason(i, hmi_evt, &flags);
> -			find_nx_checkstop_reason(i, hmi_evt, &flags);
> +			if (proc_gen != proc_gen_p10)
> +				find_nx_checkstop_reason(i, hmi_evt, &flags);
>   			find_npu_checkstop_reason(i, hmi_evt, &flags);
>   		}
>   	}
> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
> index 3bc9bcee..b3f500f4 100644
> --- a/hw/npu2-common.c
> +++ b/hw/npu2-common.c
> @@ -296,31 +296,10 @@ static void show_all_regs(struct npu2 *npu, int brick_index)
>   	}
>   }
>   
> -void npu2_dump_scoms(int chip_id)
> +void npu2_dump_scoms(struct npu2 *npu, int chip_id)
>   {
> -	struct npu2 *npu;
> -	struct phb *phb;
> -	struct npu2_dev *dev;
> -
> -	/*
> -	 * Look for the npu2 structure for that chip ID. We can access it
> -	 * through the array of phbs, looking for a nvlink or opencapi
> -	 * phb. We can have several entries, but they all point
> -	 * to the same npu2 structure
> -	 */
> -	for_each_phb(phb) {
> -		npu = NULL;
> -		if (phb->phb_type == phb_type_npu_v2) {
> -			npu = phb_to_npu2_nvlink(phb);
> -		} else if (phb->phb_type == phb_type_npu_v2_opencapi) {
> -			dev = phb_to_npu2_dev_ocapi(phb);
> -			npu = dev->npu;
> -		}
> -		if (npu && npu->chip_id == chip_id) {
> -			show_all_regs(npu, -1 /* all bricks */);
> -			break;
> -		}
> -	}
> +	if (npu && npu->chip_id == chip_id)
> +		show_all_regs(npu, -1 /* all bricks */);
>   }
>   
>   static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused)
> diff --git a/hw/pau.c b/hw/pau.c
> index 61a5dda4..d4222361 100644
> --- a/hw/pau.c
> +++ b/hw/pau.c
> @@ -33,6 +33,50 @@ struct pau_dev *pau_next_dev(struct pau *pau, struct pau_dev *dev,
>   	return NULL;
>   }
>   
> +static void pau_opencapi_dump_scom_reg(struct pau *pau, uint64_t reg)
> +{
> +	PAUDBG(pau, "0x%llx = 0x%016llx\n", reg, pau_read(pau, reg));
> +}
> +
> +void pau_opencapi_dump_scoms(struct pau *pau)
> +{
> +	struct pau_dev *dev;
> +	uint64_t cq_sm;
> +
> +	for (uint32_t i = 1; i < 4; i++) {
> +		cq_sm = PAU_BLOCK_CQ_SM(i);
> +
> +		pau_opencapi_dump_scom_reg(pau, cq_sm + PAU_REG_OFFSET(PAU_MCP_MISC_CERR_MESSAGE0));
> +		pau_opencapi_dump_scom_reg(pau, cq_sm + PAU_REG_OFFSET(PAU_MCP_MISC_CERR_MESSAGE1));
> +		pau_opencapi_dump_scom_reg(pau, cq_sm + PAU_REG_OFFSET(PAU_MCP_MISC_CERR_MESSAGE2));
> +		pau_opencapi_dump_scom_reg(pau, cq_sm + PAU_REG_OFFSET(PAU_MCP_MISC_CERR_MESSAGE3));
> +		pau_opencapi_dump_scom_reg(pau, cq_sm + PAU_REG_OFFSET(PAU_MCP_MISC_CERR_MESSAGE4));
> +		pau_opencapi_dump_scom_reg(pau, cq_sm + PAU_REG_OFFSET(PAU_MCP_MISC_CERR_MESSAGE5));
> +		pau_opencapi_dump_scom_reg(pau, cq_sm + PAU_REG_OFFSET(PAU_MCP_MISC_CERR_MESSAGE6));
> +		pau_opencapi_dump_scom_reg(pau, cq_sm + PAU_REG_OFFSET(PAU_MCP_MISC_CERR_MESSAGE7));
> +		pau_opencapi_dump_scom_reg(pau, cq_sm + PAU_REG_OFFSET(PAU_MCP_MISC_CERR_FIRST0));
> +		pau_opencapi_dump_scom_reg(pau, cq_sm + PAU_REG_OFFSET(PAU_MCP_MISC_CERR_FIRST1));
> +		pau_opencapi_dump_scom_reg(pau, cq_sm + PAU_REG_OFFSET(PAU_MCP_MISC_CERR_FIRST2));
> +	}
> +
> +	pau_opencapi_dump_scom_reg(pau, PAU_CTL_MISC_CERR_MESSAGE0);
> +	pau_opencapi_dump_scom_reg(pau, PAU_CTL_MISC_CERR_MESSAGE1);
> +	pau_opencapi_dump_scom_reg(pau, PAU_CTL_MISC_CERR_MESSAGE2);
> +	pau_opencapi_dump_scom_reg(pau, PAU_CTL_MISC_CERR_FIRST0);
> +	pau_opencapi_dump_scom_reg(pau, PAU_CTL_MISC_CERR_FIRST1);
> +	pau_opencapi_dump_scom_reg(pau, PAU_DAT_MISC_CERR_ECC_HOLD);
> +	pau_opencapi_dump_scom_reg(pau, PAU_DAT_MISC_CERR_ECC_MASK);
> +	pau_opencapi_dump_scom_reg(pau, PAU_DAT_MISC_CERR_ECC_FIRST);
> +
> +	pau_for_each_opencapi_dev(dev, pau) {
> +		pau_opencapi_dump_scom_reg(pau, PAU_OTL_MISC_ERR_RPT_HOLD0(dev->index));
> +		pau_opencapi_dump_scom_reg(pau, PAU_OTL_MISC_OTL_REM0(dev->index));
> +		pau_opencapi_dump_scom_reg(pau, PAU_OTL_MISC_ERROR_SIG_RXI(dev->index));
> +		pau_opencapi_dump_scom_reg(pau, PAU_OTL_MISC_ERROR_SIG_RXO(dev->index));
> +		pau_opencapi_dump_scom_reg(pau, PAU_OTL_MISC_ERR_RPT_HOLD1(dev->index));
> +	}
> +}
> +
>   static void pau_dt_create_link(struct dt_node *pau, uint32_t pau_index,
>   			       uint32_t dev_index)
>   {
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index 22f58a6a..cb1d3956 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -610,6 +610,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   
>   #define NPU2_TOTAL_FIR_REGISTERS		3
>   
> +#define NPU2_FIR(n)				(0x2c00 + (n) * 0x40)
> +#define NPU2_FIR_MASK(n)			(0x2c03 + (n) * 0x40)
> +#define NPU2_FIR_ACTION0(n)			(0x2c06 + (n) * 0x40)
> +#define NPU2_FIR_ACTION1(n)			(0x2c07 + (n) * 0x40)
> +
>   /*
>    * Can't use enums for 64 bit values, use #defines
>    */
> diff --git a/include/npu2.h b/include/npu2.h
> index 23b06b4b..a12bf98a 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -241,7 +241,7 @@ int64_t npu2_freeze_status(struct phb *phb __unused,
>   			   uint8_t *freeze_state,
>   			   uint16_t *pci_error_type __unused,
>   			   uint16_t *severity __unused);
> -void npu2_dump_scoms(int chip_id);
> +void npu2_dump_scoms(struct npu2 *npu, int chip_id);
>   
>   int64_t npu2_init_context(struct phb *phb, uint64_t msr, uint64_t bdf);
>   int64_t npu2_destroy_context(struct phb *phb, uint64_t bdf);
> diff --git a/include/pau-regs.h b/include/pau-regs.h
> index 19b0b7cd..b852a5b5 100644
> --- a/include/pau-regs.h
> +++ b/include/pau-regs.h
> @@ -48,6 +48,17 @@
>   #define   PAU_MCP_MISC_CFG0_MA_MCRESP_OPT_WRP	PPC_BIT(9)
>   #define   PAU_MCP_MISC_CFG0_ENABLE_PBUS		PPC_BIT(26)
>   #define   PAU_MCP_MISC_CFG0_OCAPI_MODE		PPC_BITMASK(44, 48)
> +#define PAU_MCP_MISC_CERR_MESSAGE0		(PAU_BLOCK_CQ_SM(0) + 0x030)
> +#define PAU_MCP_MISC_CERR_MESSAGE1		(PAU_BLOCK_CQ_SM(0) + 0x038)
> +#define PAU_MCP_MISC_CERR_MESSAGE2		(PAU_BLOCK_CQ_SM(0) + 0x040)
> +#define PAU_MCP_MISC_CERR_MESSAGE3		(PAU_BLOCK_CQ_SM(0) + 0x048)
> +#define PAU_MCP_MISC_CERR_MESSAGE4		(PAU_BLOCK_CQ_SM(0) + 0x050)
> +#define PAU_MCP_MISC_CERR_MESSAGE5		(PAU_BLOCK_CQ_SM(0) + 0x058)
> +#define PAU_MCP_MISC_CERR_MESSAGE6		(PAU_BLOCK_CQ_SM(0) + 0x060)
> +#define PAU_MCP_MISC_CERR_MESSAGE7		(PAU_BLOCK_CQ_SM(0) + 0x068)
> +#define PAU_MCP_MISC_CERR_FIRST0		(PAU_BLOCK_CQ_SM(0) + 0x078)
> +#define PAU_MCP_MISC_CERR_FIRST1		(PAU_BLOCK_CQ_SM(0) + 0x080)
> +#define PAU_MCP_MISC_CERR_FIRST2		(PAU_BLOCK_CQ_SM(0) + 0x088)
>   #define PAU_SNP_MISC_CFG0			(PAU_BLOCK_CQ_SM(0) + 0x180)
>   #define   PAU_SNP_MISC_CFG0_ENABLE_PBUS		PPC_BIT(2)
>   #define   PAU_SNP_MISC_CFG0_OCAPI_MODE		PPC_BITMASK(32, 36)
> @@ -79,6 +90,11 @@
>   #define PAU_CTL_MISC_MMIOPA_CONFIG(brk)		(PAU_BLOCK_CQ_CTL + 0x098 + (brk) * 8)
>   #define   PAU_CTL_MISC_MMIOPA_CONFIG_BAR_ADDR	PPC_BITMASK(1, 35)
>   #define   PAU_CTL_MISC_MMIOPA_CONFIG_BAR_SIZE	PPC_BITMASK(39, 43)
> +#define PAU_CTL_MISC_CERR_MESSAGE0		(PAU_BLOCK_CQ_CTL + 0x0C0)
> +#define PAU_CTL_MISC_CERR_MESSAGE1		(PAU_BLOCK_CQ_CTL + 0x0C8)
> +#define PAU_CTL_MISC_CERR_MESSAGE2		(PAU_BLOCK_CQ_CTL + 0x0D0)
> +#define PAU_CTL_MISC_CERR_FIRST0		(PAU_BLOCK_CQ_CTL + 0x0D8)
> +#define PAU_CTL_MISC_CERR_FIRST1		(PAU_BLOCK_CQ_CTL + 0x0E0)
>   #define PAU_CTL_MISC_FENCE_CTRL(brk)		(PAU_BLOCK_CQ_CTL + 0x108 + (brk) * 8)
>   #define   PAU_CTL_MISC_FENCE_REQUEST		PPC_BITMASK(0, 1)
>   #define PAU_CTL_MISC_CFG_ADDR(brk)		(PAU_BLOCK_CQ_CTL + 0x250 + (brk) * 8)
> @@ -93,6 +109,9 @@
>   /* CQ_DAT block registers */
>   #define PAU_DAT_MISC_CFG1			(PAU_BLOCK_CQ_DAT + 0x008)
>   #define   PAU_DAT_MISC_CFG1_OCAPI_MODE		PPC_BITMASK(40, 44)
> +#define PAU_DAT_MISC_CERR_ECC_HOLD		(PAU_BLOCK_CQ_DAT + 0x020)
> +#define PAU_DAT_MISC_CERR_ECC_MASK		(PAU_BLOCK_CQ_DAT + 0x028)
> +#define PAU_DAT_MISC_CERR_ECC_FIRST		(PAU_BLOCK_CQ_DAT + 0x030)
>   
>   /* OTL block registers */
>   #define PAU_OTL_MISC_CFG0(brk)			(PAU_BLOCK_OTL(brk) + 0x000)
> @@ -102,6 +121,7 @@
>   #define   PAU_OTL_MISC_CFG0_ENABLE_4_0		PPC_BIT(51)
>   #define   PAU_OTL_MISC_CFG0_XLATE_RELEASE	PPC_BIT(62)
>   #define   PAU_OTL_MISC_CFG0_ENABLE_5_0		PPC_BIT(63)
> +#define PAU_OTL_MISC_ERR_RPT_HOLD0(brk)		(PAU_BLOCK_OTL(brk) + 0x030)
>   #define PAU_OTL_MISC_CFG_TLX_CREDITS(brk)	(PAU_BLOCK_OTL(brk) + 0x050)
>   #define   PAU_OTL_MISC_CFG_TLX_CREDITS_VC0	PPC_BITMASK(0, 7)
>   #define   PAU_OTL_MISC_CFG_TLX_CREDITS_VC1	PPC_BITMASK(8, 15)
> @@ -118,6 +138,10 @@
>   #define   PAU_OTL_MISC_CFG_TX_TEMP2_RATE	PPC_BITMASK(16, 19)
>   #define   PAU_OTL_MISC_CFG_TX_TEMP3_RATE	PPC_BITMASK(20, 23)
>   #define   PAU_OTL_MISC_CFG_TX_CRET_FREQ		PPC_BITMASK(32, 34)
> +#define PAU_OTL_MISC_OTL_REM0(brk)		(PAU_BLOCK_OTL(brk) + 0x068)
> +#define PAU_OTL_MISC_ERROR_SIG_RXI(brk)		(PAU_BLOCK_OTL(brk) + 0x070)
> +#define PAU_OTL_MISC_ERROR_SIG_RXO(brk)		(PAU_BLOCK_OTL(brk) + 0x078)
> +#define PAU_OTL_MISC_ERR_RPT_HOLD1(brk)		(PAU_BLOCK_OTL(brk) + 0x0B0)
>   #define PAU_OTL_MISC_PSL_DSISR_AN(brk)		(PAU_BLOCK_OTL_PSL(brk) + 0x000)
>   #define PAU_OTL_MISC_PSL_DAR_AN(brk)		(PAU_BLOCK_OTL_PSL(brk) + 0x008)
>   #define PAU_OTL_MISC_PSL_TFC_AN(brk)		(PAU_BLOCK_OTL_PSL(brk) + 0x010)
> diff --git a/include/pau.h b/include/pau.h
> index b6fabe7f..547c1934 100644
> --- a/include/pau.h
> +++ b/include/pau.h
> @@ -189,4 +189,6 @@ static inline uint64_t pau_read(struct pau *pau, uint64_t reg)
>   	return pau_scom_read(pau, reg, PAU_MISC_DA_LEN_8B);
>   }
>   
> +void pau_opencapi_dump_scoms(struct pau *pau);
> +
>   #endif /* __PAU_H */
> diff --git a/include/xscom-p10-regs.h b/include/xscom-p10-regs.h
> index 21ac21f8..5ca4703f 100644
> --- a/include/xscom-p10-regs.h
> +++ b/include/xscom-p10-regs.h
> @@ -15,6 +15,9 @@
>   #define P10_NX_DMA_ENGINE_FIR	0x02011100 /* DMA & Engine FIR Data Register */
>   #define P10_NX_PBI_FIR		0x02011080 /* PowerBus Interface FIR Register */
>   
> +/* pMisc Receive Malfunction Alert Register */
> +#define P10_MALFUNC_ALERT	0x00090022
> +
>   #define P10_EC_CORE_THREAD_STATE	0x412 /* XXX P10 is this right? */
>   #define P10_THREAD_STOPPED(t)		PPC_BIT(56 + (t))
>   
> 


More information about the Skiboot mailing list