[Skiboot] [PATCH 3/3] hack: pci-quirk: Scan for config ranges that need the workaround

Oliver O'Halloran oohall at gmail.com
Fri Jul 19 19:38:21 AEST 2019


More robust, but *really* slow.

$ grep Applied /sys/firmware/opal/msglog -A1
[  451.128374154,5] PCI-QUIRK: PHB#0001:01:00.0 Applied UR workaround to [9c...100)
[  451.352419458,5] PCI-QUIRK: PHB#0001:01:00.0 Applied UR workaround to [284...7f8)
[  452.098609721,5] PHB#0001:01:00.0 [SWUP] 11f8 4052 R:00 C:060400 B:02..06 SLOT=005  x8
---
[  452.131262166,5] PCI-QUIRK: PHB#0001:02:00.0 Applied UR workaround to [9c...100)
[  452.355301176,5] PCI-QUIRK: PHB#0001:02:00.0 Applied UR workaround to [288...7f8)
[  453.101497705,5] PHB#0001:02:00.0 [SWDN] 11f8 4052 R:00 C:060400 B:03..03 SLOT=005  x8
---
[  453.136158940,5] PCI-QUIRK: PHB#0001:02:01.0 Applied UR workaround to [9c...100)
[  453.360198154,5] PCI-QUIRK: PHB#0001:02:01.0 Applied UR workaround to [288...7f8)
[  454.106394518,5] PHB#0001:02:01.0 [SWDN] 11f8 4052 R:00 C:060400 B:04..04 SLOT=C11  x8
---
[  454.141055796,5] PCI-QUIRK: PHB#0001:02:02.0 Applied UR workaround to [9c...100)
[  454.365094880,5] PCI-QUIRK: PHB#0001:02:02.0 Applied UR workaround to [288...7f8)
[  455.111291381,5] PHB#0001:02:02.0 [SWDN] 11f8 4052 R:00 C:060400 B:05..05 SLOT=C12 x16
---
[  455.144948035,5] PCI-QUIRK: PHB#0001:02:03.0 Applied UR workaround to [9c...100)
[  455.368987098,5] PCI-QUIRK: PHB#0001:02:03.0 Applied UR workaround to [288...7f8)
[  456.115183503,5] PHB#0001:02:03.0 [SWDN] 11f8 4052 R:00 C:060400 B:06..06 SLOT=C49  x8
---
[  456.115686350,5] PHB#0001:06:00.0 [EP  ] 1014 034a R:02 C:010400 (          raid) LOC_CODE=C49
[  456.148337007,5] PCI-QUIRK: PHB#0001:01:00.1 Applied UR workaround to [a0...100)
[  457.118453865,5] PCI-QUIRK: PHB#0001:01:00.1 Applied UR workaround to [17c...fff)
[  457.118572672,5] PHB#0001:01:00.1 [EP  ] 11f8 4052 R:00 C:058000 (memory-controller) LOC_CODE=003

Signed-off-by: Oliver O'Halloran <oohall at gmail.com>
---
 core/pci-quirk.c | 40 ++++++++++++++++++----------------------
 core/pci.c       | 15 ++++++++++-----
 include/pci.h    |  2 ++
 3 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/core/pci-quirk.c b/core/pci-quirk.c
index 371ff62b4b72..2452409a1c2b 100644
--- a/core/pci-quirk.c
+++ b/core/pci-quirk.c
@@ -54,29 +54,25 @@ static int64_t cfg_block_filter(void *dev __unused,
 
 static void quirk_microsemi_gen4_sw(struct phb *phb, struct pci_device *pd)
 {
-	/*
-	 * The gen4 pcie switch used on some ZZ systems has a bug where it'll
-	 * throw URs in response to a cfg read to a range that's "reserved"
-	 * work around it by blackholing.
-	 */
-	if (pd->dev_type == PCIE_TYPE_ENDPOINT && pd->class == 0x058000) {
-		/*
-		 * we match on the class code too since the switch might
-		 * support an NTB endpoint.
-		 */
-		BLOCK_CFG_RANGE(pd, 0xa0, 0xff);
-		BLOCK_CFG_RANGE(pd, 0x17c, 0xfff);
-	} else if (pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) {
-		BLOCK_CFG_RANGE(pd, 0x09c, 0xff);
-		BLOCK_CFG_RANGE(pd, 0x284, 0x7f7);
-	} else if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) {
-		BLOCK_CFG_RANGE(pd, 0x09c, 0xff);
-		BLOCK_CFG_RANGE(pd, 0x288, 0x7f7);
-	} else {
-		return;
-	}
+	uint8_t data;
+	bool frozen;
+	int offset;
+	int start;
+
+	pci_check_clear_freeze(phb);
 
-	PCINOTICE(phb, pd->bdfn, "Applied Microsemi Gen4 UR workaround\n");
+	for (start = -1, offset = 0; offset < 4096; offset++) {
+		pci_cfg_read8(phb, pd->bdfn, offset, &data);
+		frozen = pci_check_clear_freeze(phb);
+
+		if (start >= 0 && (!frozen || offset == 4095)) { /* end of range */
+			BLOCK_CFG_RANGE(pd, start, offset - 1);
+			PCINOTICE(phb, pd->bdfn, "Applied UR workaround to [%03x..%03x)\n", start, offset - 1);
+			start = -1;
+		} else if (frozen && start < 0) { /* new UR range */
+			start = offset;
+		}
+	}
 }
 
 static void quirk_astbmc_vga(struct phb *phb __unused,
diff --git a/core/pci.c b/core/pci.c
index e870d09b5c55..2a36290d6598 100644
--- a/core/pci.c
+++ b/core/pci.c
@@ -316,10 +316,12 @@ static struct pci_device *pci_scan_one(struct phb *phb, struct pci_device *paren
  *                          everything (default state of our backend) so
  *                          we just check and clear the state of PE#0
  *
+ *                          returns true if a freeze was detected
+ *
  * NOTE: We currently only handle simple PE freeze, not PHB fencing
  *       (or rather our backend does)
  */
-static void pci_check_clear_freeze(struct phb *phb)
+bool pci_check_clear_freeze(struct phb *phb)
 {
 	uint8_t freeze_state;
 	uint16_t pci_error_type, sev;
@@ -330,23 +332,26 @@ static void pci_check_clear_freeze(struct phb *phb)
 	if (phb->ops->get_reserved_pe_number)
 		pe_number = phb->ops->get_reserved_pe_number(phb);
 	if (pe_number < 0)
-		return;
+		return false;
 
 	/* Retrieve the frozen state */
 	rc = phb->ops->eeh_freeze_status(phb, pe_number, &freeze_state,
 					 &pci_error_type, &sev);
 	if (rc)
-		return;
+		return true; /* phb fence? */
+
 	if (freeze_state == OPAL_EEH_STOPPED_NOT_FROZEN)
-		return;
+		return false;
 	/* We can't handle anything worse than an ER here */
 	if (sev > OPAL_EEH_SEV_NO_ERROR &&
 	    sev < OPAL_EEH_SEV_PE_ER) {
 		PCIERR(phb, 0, "Fatal probe in %s error !\n", __func__);
-		return;
+		return true;
 	}
+
 	phb->ops->eeh_freeze_clear(phb, pe_number,
 				   OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+	return true;
 }
 
 /*
diff --git a/include/pci.h b/include/pci.h
index c10d79418e70..2d1328ea31d4 100644
--- a/include/pci.h
+++ b/include/pci.h
@@ -404,6 +404,8 @@ static inline void phb_unlock(struct phb *phb)
 	unlock(&phb->lock);
 }
 
+bool pci_check_clear_freeze(struct phb *phb);
+
 /* Config space ops wrappers */
 static inline int64_t pci_cfg_read8(struct phb *phb, uint32_t bdfn,
 				    uint32_t offset, uint8_t *data)
-- 
2.21.0



More information about the Skiboot mailing list