[Skiboot] [PATCH 5/6] hw/phb3: Adjust ECRC on root port dynamically

Gavin Shan gwshan at linux.vnet.ibm.com
Thu Mar 30 10:05:30 AEDT 2017


The issue was reported by Mark: the Samsung NVMe adapter is lost
when it's connected to PMC 8546 PCIe switch, until ECRC is disabled
on the root port. Actually, we found similar issue prevously when
Broadcom adapter is connected to same part of PCIe switch and it
was fixed by commit 60ce59ccd0e9 ("hw/phb3: Disable ECRC on Broadcom
adapter behind PMC switch"). Unfortunately, the commit doesn't fix
the Samsung NVMe adapter lost issue.

This fixes the issues by disable ECRC generation/check on root port
when PMC 8546 PCIe switch ports are found. This can be extended for
other PCIe switches or endpoints in future: Each PHB maintains the
count of PCI devices (PMC 8546 PCIe switch ports currently) which
require to disable ECRC on root port. The ECRC functionality is
enabled when first PMC 8546 switch port is probed and disabled when
last PMC 8546 switch port is destroyed (in PCI hot remove scenario).
Except PHB's reinitialization after complete reset, the ECRC on
root port is untouched.

Reported-by: Mark E Schreiter <markes at us.ibm.com>
Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
Tested-by: Mark E Schreiter <markes at us.ibm.com>
---
 hw/phb3.c      | 74 +++++++++++++++++++++++++++++++++++++++++++++++++---------
 include/phb3.h |  1 +
 2 files changed, 64 insertions(+), 11 deletions(-)

diff --git a/hw/phb3.c b/hw/phb3.c
index b105fb2..3463ca1 100644
--- a/hw/phb3.c
+++ b/hw/phb3.c
@@ -367,6 +367,26 @@ static int64_t phb3_get_reserved_pe_number(struct phb *phb __unused)
 	return PHB3_RESERVED_PE_NUM;
 }
 
+static inline void phb3_enable_ecrc(struct phb *phb, bool enable)
+{
+	struct phb3 *p = phb_to_phb3(phb);
+	uint32_t ctl;
+
+	if (p->aercap <= 0)
+		return;
+
+	pci_cfg_read32(phb, 0, p->aercap + PCIECAP_AER_CAPCTL, &ctl);
+	if (enable) {
+		ctl |= (PCIECAP_AER_CAPCTL_ECRCG_EN |
+			PCIECAP_AER_CAPCTL_ECRCC_EN);
+	} else {
+		ctl &= ~(PCIECAP_AER_CAPCTL_ECRCG_EN |
+			 PCIECAP_AER_CAPCTL_ECRCC_EN);
+	}
+
+	pci_cfg_write32(phb, 0, p->aercap + PCIECAP_AER_CAPCTL, ctl);
+}
+
 static void phb3_root_port_init(struct phb *phb, struct pci_device *dev,
 				int ecap, int aercap)
 {
@@ -436,10 +456,7 @@ static void phb3_root_port_init(struct phb *phb, struct pci_device *dev,
 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, val32);
 
 	/* Enable ECRC check */
-	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
-	val32 |= (PCIECAP_AER_CAPCTL_ECRCG_EN |
-		  PCIECAP_AER_CAPCTL_ECRCC_EN);
-	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
+	phb3_enable_ecrc(phb, true);
 
 	/* Enable all error reporting */
 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_RERR_CMD, &val32);
@@ -646,10 +663,24 @@ static void phb3_check_device_quirks(struct phb *phb, struct pci_device *dev)
 	}
 }
 
+static inline int phb3_should_disable_ecrc(struct pci_device *pd)
+{
+	/*
+	 * When we have PMC PCIe switch, we need disable ECRC on root port.
+	 * Otherwise, the adapters behind the switch downstream ports might
+	 * not probed successfully.
+	 */
+	if (pd->vdid == 0x854611f8)
+		return true;
+
+	return false;
+}
+
 static int phb3_device_init(struct phb *phb,
 			    struct pci_device *dev,
-			    void *data __unused)
+			    void *data)
 {
+	struct phb3 *p = phb_to_phb3(phb);
 	int ecap, aercap;
 
 	/* Some special adapter tweaks for devices directly under the PHB */
@@ -668,9 +699,30 @@ static int phb3_device_init(struct phb *phb,
 	else
 		phb3_endpoint_init(phb, dev, ecap, aercap);
 
+	/*
+	 * Check if we need disable ECRC functionality on root port. It
+	 * only happens when PCI topology changes, meaning it's skipped
+	 * when reinitializing PCI device after EEH reset.
+	 */
+	if (!data && phb3_should_disable_ecrc(dev)) {
+		if (p->no_ecrc_devs++ == 0)
+			phb3_enable_ecrc(phb, false);
+	}
+
 	return 0;
 }
 
+static void phb3_device_remove(struct phb *phb, struct pci_device *pd)
+{
+	struct phb3 *p = phb_to_phb3(phb);
+
+	if (!phb3_should_disable_ecrc(pd) || p->no_ecrc_devs == 0)
+		return;
+
+	if (--p->no_ecrc_devs == 0)
+		phb3_enable_ecrc(phb, true);
+}
+
 static int64_t phb3_pci_reinit(struct phb *phb, uint64_t scope, uint64_t data)
 {
 	struct pci_device *pd;
@@ -684,7 +736,7 @@ static int64_t phb3_pci_reinit(struct phb *phb, uint64_t scope, uint64_t data)
 	if (!pd)
 		return OPAL_PARAMETER;
 
-	ret = phb3_device_init(phb, pd, NULL);
+	ret = phb3_device_init(phb, pd, pd);
 	if (ret)
 		return OPAL_HARDWARE;
 
@@ -3810,7 +3862,7 @@ static const struct phb_ops phb3_ops = {
 	.choose_bus		= phb3_choose_bus,
 	.get_reserved_pe_number	= phb3_get_reserved_pe_number,
 	.device_init		= phb3_device_init,
-	.device_remove		= NULL,
+	.device_remove		= phb3_device_remove,
 	.ioda_reset		= phb3_ioda_reset,
 	.papr_errinjct_reset	= phb3_papr_errinjct_reset,
 	.pci_reinit		= phb3_pci_reinit,
@@ -4093,10 +4145,10 @@ static bool phb3_init_rc_cfg(struct phb3 *p)
 	phb3_pcicfg_write32(&p->phb, 0, aercap + PCIECAP_AER_CE_MASK,
 			    PCIECAP_AER_CE_ADV_NONFATAL |
 			    (p->has_link ? 0 : PCIECAP_AER_CE_RECVR_ERR));
-	/* Enable ECRC generation & checking */
-	phb3_pcicfg_write32(&p->phb, 0, aercap + PCIECAP_AER_CAPCTL,
-			     PCIECAP_AER_CAPCTL_ECRCG_EN	|
-			     PCIECAP_AER_CAPCTL_ECRCC_EN);
+
+	/* Enable or disable ECRC generation & checking */
+	phb3_enable_ecrc(&p->phb, !p->no_ecrc_devs);
+
 	/* Enable reporting in root error control */
 	phb3_pcicfg_write32(&p->phb, 0, aercap + PCIECAP_AER_RERR_CMD,
 			     PCIECAP_AER_RERR_CMD_FE		|
diff --git a/include/phb3.h b/include/phb3.h
index cb36515..3f949fc 100644
--- a/include/phb3.h
+++ b/include/phb3.h
@@ -299,6 +299,7 @@ struct phb3 {
 	int64_t			aercap;	    /* cached AER ecap offset */
 	const __be64		*lane_eq;
 	unsigned int		max_link_speed;
+	uint32_t		no_ecrc_devs;
 
 	uint16_t		rte_cache[RTT_TABLE_ENTRIES];
 	uint8_t			peltv_cache[PELTV_TABLE_SIZE];
-- 
2.7.4



More information about the Skiboot mailing list