[Skiboot] [PATCH V3 5/6] hmi: Recover both CAPP units on Naples after malfunction alert

Philippe Bergheaud felix at linux.vnet.ibm.com
Sat Mar 12 02:55:13 AEDT 2016


Naples has two capp units. Probe both units to identify the card in
error. Use the xscom register offset to operate on the right unit.

Signed-off-by: Philippe Bergheaud <felix at linux.vnet.ibm.com>
---
V2: after Mikey's review
  - Identify and recover the broken card (not both)
  - Add reg_offset to register adresses in do_capp_recovery_scoms
V3:
  - Parenthesis around macro arguments
  - New macro CHIP_IS_NAPLES(chip)
  - Added a comment to clarify handle_capp_recoverable
  - Use int capp_num instead of boolean dual_capp
  - Use new macro PHB3_CAPP_REG_OFFSET(p)
  - s/reg_offset/offset/
  - s/capp/capp_index/

 core/hmi.c | 47 +++++++++++++++++++++++++++++++++++------------
 hw/phb3.c  | 16 +++++++++++-----
 2 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/core/hmi.c b/core/hmi.c
index d2cca90..e5d7e17 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -242,14 +242,21 @@ static int queue_hmi_event(struct OpalHMIEvent *hmi_evt, int recover)
 				hmi_data[3]);
 }
 
-static int is_capp_recoverable(int chip_id)
+static int is_capp_recoverable(int chip_id, int capp_index)
 {
 	uint64_t reg;
-	xscom_read(chip_id, CAPP_ERR_STATUS_CTRL, &reg);
+	uint32_t reg_offset = capp_index ? CAPP1_REG_OFFSET : 0x0;
+
+	xscom_read(chip_id, CAPP_ERR_STATUS_CTRL + reg_offset, &reg);
 	return (reg & PPC_BIT(0)) != 0;
 }
 
-static int handle_capp_recoverable(int chip_id)
+#define CAPP_PHB3_ATTACHED(chip, phb_index) \
+	((chip)->capp_phb3_attached_mask & (1 << (phb_index)))
+
+#define CHIP_IS_NAPLES(chip) ((chip)->type == PROC_CHIP_P8_NAPLES)
+
+static int handle_capp_recoverable(int chip_id, int capp_index)
 {
 	struct dt_node *np;
 	u64 phb_id;
@@ -257,14 +264,26 @@ static int handle_capp_recoverable(int chip_id)
 	struct phb *phb;
 	u32 phb_index;
 	struct proc_chip *chip = get_chip(chip_id);
-	u8 mask = chip->capp_phb3_attached_mask;
 
 	dt_for_each_compatible(dt_root, np, "ibm,power8-pciex") {
 		dt_chip_id = dt_prop_get_u32(np, "ibm,chip-id");
 		phb_index = dt_prop_get_u32(np, "ibm,phb-index");
 		phb_id = dt_prop_get_u64(np, "ibm,opal-phbid");
 
-		if ((mask & (1 << phb_index)) && (chip_id == dt_chip_id)) {
+		/*
+		 * Murano/Venice have a single capp (capp0) per chip,
+		 * that can be attached to phb0, phb1 or phb2.
+		 * The capp is identified as being attached to the chip,
+		 * regardless of the phb index.
+		 *
+		 * Naples has two capps per chip: capp0 attached to phb0,
+		 * and capp1 attached to phb1.
+		 * Once we know that the capp is attached to the chip,
+		 * we must also check that capp/phb indices are equal.
+		 */
+		if ((chip_id == dt_chip_id) &&
+		    CAPP_PHB3_ATTACHED(chip, phb_index) &&
+		    (!CHIP_IS_NAPLES(chip) || phb_index == capp_index)) {
 			phb = pci_get_phb(phb_id);
 			phb->ops->lock(phb);
 			phb->ops->set_capp_recovery(phb);
@@ -277,17 +296,21 @@ static int handle_capp_recoverable(int chip_id)
 
 static int decode_one_malfunction(int flat_chip_id, struct OpalHMIEvent *hmi_evt)
 {
+	int capp_index;
+	struct proc_chip *chip = get_chip(flat_chip_id);
+	int capp_num = CHIP_IS_NAPLES(chip) ? 2 : 1;
+
 	hmi_evt->severity = OpalHMI_SEV_FATAL;
 	hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
 
-	if (is_capp_recoverable(flat_chip_id)) {
-		if (handle_capp_recoverable(flat_chip_id) == 0)
-			return 0;
+	for (capp_index = 0; capp_index < capp_num; capp_index++)
+		if (is_capp_recoverable(flat_chip_id, capp_index))
+			if (handle_capp_recoverable(flat_chip_id, capp_index)) {
+				hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
+				hmi_evt->type = OpalHMI_ERROR_CAPP_RECOVERY;
+				return 1;
+			}
 
-		hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
-		hmi_evt->type = OpalHMI_ERROR_CAPP_RECOVERY;
-		return 1;
-	}
 	/* TODO check other FIRs */
 	return 0;
 }
diff --git a/hw/phb3.c b/hw/phb3.c
index cd02b75..ef9a679 100644
--- a/hw/phb3.c
+++ b/hw/phb3.c
@@ -2442,16 +2442,22 @@ static int64_t capp_load_ucode(struct phb3 *p)
 static void do_capp_recovery_scoms(struct phb3 *p)
 {
 	uint64_t reg;
+	uint32_t offset;
+
 	PHBDBG(p, "Doing CAPP recovery scoms\n");
 
-	xscom_write(p->chip_id, SNOOP_CAPI_CONFIG, 0); /* disable snoops */
+	offset = PHB3_CAPP_REG_OFFSET(p);
+	/* disable snoops */
+	xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0);
 	capp_load_ucode(p);
-	xscom_write(p->chip_id, CAPP_ERR_RPT_CLR, 0); /* clear err rpt reg*/
-	xscom_write(p->chip_id, CAPP_FIR, 0); /* clear capp fir */
+	/* clear err rpt reg*/
+	xscom_write(p->chip_id, CAPP_ERR_RPT_CLR + offset, 0);
+	/* clear capp fir */
+	xscom_write(p->chip_id, CAPP_FIR + offset, 0);
 
-	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL, &reg);
+	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
 	reg &= ~(PPC_BIT(0) | PPC_BIT(1));
-	xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL, reg);
+	xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, reg);
 }
 
 /*
-- 
2.1.0



More information about the Skiboot mailing list