[PATCH 5/6] powerpc/eeh: Improve recovery of passed-through devices

Sam Bobroff sbobroff at linux.ibm.com
Thu Nov 29 14:16:41 AEDT 2018


Currently, the EEH recovery process considers passed-through devices
as if they were not EEH-aware, which can cause them to be removed as
part of recovery.  Because device removal requires cooperation from
the guest, this may lead to the process stalling or deadlocking.
Also, if devices are removed on the host side, they will be removed
from their IOMMU group, making recovery in the guest impossible.

Therefore, alter the recovery process so that passed-through devices
are not removed but are instead left frozen (and marked isolated)
until the guest performs it's own recovery.  If firmware thaws a
passed-through PE because it's parent PE has been thawed (because it
was not passed through), re-freeze it.

Signed-off-by: Sam Bobroff <sbobroff at linux.ibm.com>
---
 arch/powerpc/include/asm/eeh.h     |  2 +-
 arch/powerpc/include/asm/ppc-pci.h |  2 +-
 arch/powerpc/kernel/eeh.c          | 47 +++++++++++++++++++++++-------
 arch/powerpc/kernel/eeh_driver.c   | 32 +++++++++-----------
 drivers/vfio/vfio_spapr_eeh.c      |  6 ++--
 5 files changed, 55 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 2ff123f745cc..0b655810f32d 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -300,7 +300,7 @@ void eeh_dev_release(struct pci_dev *pdev);
 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group);
 int eeh_pe_set_option(struct eeh_pe *pe, int option);
 int eeh_pe_get_state(struct eeh_pe *pe);
-int eeh_pe_reset(struct eeh_pe *pe, int option);
+int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
 int eeh_pe_configure(struct eeh_pe *pe);
 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
 		      unsigned long addr, unsigned long mask);
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 08e094eaeccf..f191ef0d2a0a 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -53,7 +53,7 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev);
 struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr);
 void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
 int eeh_pci_enable(struct eeh_pe *pe, int function);
-int eeh_pe_reset_full(struct eeh_pe *pe);
+int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed);
 void eeh_save_bars(struct eeh_dev *edev);
 int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
 int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 052512e58b05..df02f55fdfa1 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -877,6 +877,24 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
 	return NULL;
 }
 
+static void eeh_pe_refreeze_passed(struct eeh_pe *root)
+{
+	struct eeh_pe *pe;
+	int state;
+
+	eeh_for_each_pe(root, pe) {
+		if (eeh_pe_passed(pe)) {
+			state = eeh_ops->get_state(pe, NULL);
+			if (state &
+			   (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) {
+				pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n",
+					pe->phb->global_number, pe->addr);
+				eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
+			}
+		}
+	}
+}
+
 /**
  * eeh_pe_reset_full - Complete a full reset process on the indicated PE
  * @pe: EEH PE
@@ -889,7 +907,7 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
  *
  * This function will attempt to reset a PE three times before failing.
  */
-int eeh_pe_reset_full(struct eeh_pe *pe)
+int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed)
 {
 	int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
 	int type = EEH_RESET_HOT;
@@ -911,11 +929,11 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
 
 	/* Make three attempts at resetting the bus */
 	for (i = 0; i < 3; i++) {
-		ret = eeh_pe_reset(pe, type);
+		ret = eeh_pe_reset(pe, type, include_passed);
 		if (ret)
 			break;
 
-		ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
+		ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, include_passed);
 		if (ret)
 			break;
 
@@ -936,6 +954,12 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
 			__func__, state, pe->phb->global_number, pe->addr, (i + 1));
 	}
 
+	/* Resetting the PE may have unfrozen child PEs. If those PEs have been
+	 * (potentially) passed through to a guest, re-freeze them:
+	 */
+	if (!include_passed)
+		eeh_pe_refreeze_passed(pe);
+
 	eeh_pe_state_clear(pe, reset_state, true);
 	return ret;
 }
@@ -1611,13 +1635,12 @@ int eeh_pe_get_state(struct eeh_pe *pe)
 }
 EXPORT_SYMBOL_GPL(eeh_pe_get_state);
 
-static int eeh_pe_reenable_devices(struct eeh_pe *pe)
+static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed)
 {
 	struct eeh_dev *edev, *tmp;
 	struct pci_dev *pdev;
 	int ret = 0;
 
-	/* Restore config space */
 	eeh_pe_restore_bars(pe);
 
 	/*
@@ -1638,9 +1661,13 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe)
 	}
 
 	/* The PE is still in frozen state */
-	ret = eeh_unfreeze_pe(pe);
+	if (include_passed || !eeh_pe_passed(pe)) {
+		ret = eeh_unfreeze_pe(pe);
+	} else
+		pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n",
+			pe->phb->global_number, pe->addr);
 	if (!ret)
-		eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
+		eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed);
 	return ret;
 }
 
@@ -1654,7 +1681,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe)
  * indicated type, either fundamental reset or hot reset.
  * PE reset is the most important part for error recovery.
  */
-int eeh_pe_reset(struct eeh_pe *pe, int option)
+int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed)
 {
 	int ret = 0;
 
@@ -1668,11 +1695,11 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
 	switch (option) {
 	case EEH_RESET_DEACTIVATE:
 		ret = eeh_ops->reset(pe, option);
-		eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
+		eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed);
 		if (ret)
 			break;
 
-		ret = eeh_pe_reenable_devices(pe);
+		ret = eeh_pe_reenable_devices(pe, include_passed);
 		break;
 	case EEH_RESET_HOT:
 	case EEH_RESET_FUNDAMENTAL:
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 61c177ebb230..ad7be478750f 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -510,22 +510,11 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
 	 * support EEH. So we just care about PCI devices for
 	 * simplicity here.
 	 */
-	if (!dev || (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
-		return NULL;
-
-	/*
-	 * We rely on count-based pcibios_release_device() to
-	 * detach permanently offlined PEs. Unfortunately, that's
-	 * not reliable enough. We might have the permanently
-	 * offlined PEs attached, but we needn't take care of
-	 * them and their child devices.
-	 */
-	if (eeh_dev_removed(edev))
+	if (!eeh_edev_actionable(edev) ||
+	    (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
 		return NULL;
 
 	if (rmv_data) {
-		if (eeh_pe_passed(edev->pe))
-			return NULL;
 		driver = eeh_pcid_get(dev);
 		if (driver) {
 			if (driver->err_handler &&
@@ -539,8 +528,8 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
 	}
 
 	/* Remove it from PCI subsystem */
-	pr_debug("EEH: Removing %s without EEH sensitive driver\n",
-		 pci_name(dev));
+	pr_info("EEH: Removing %s without EEH sensitive driver\n",
+		pci_name(dev));
 	edev->mode |= EEH_DEV_DISCONNECTED;
 	if (rmv_data)
 		rmv_data->removed_dev_count++;
@@ -624,7 +613,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
 	eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
 
 	/* Issue reset */
-	ret = eeh_pe_reset_full(pe);
+	ret = eeh_pe_reset_full(pe, true);
 	if (ret) {
 		eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
 		return ret;
@@ -664,6 +653,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 	time64_t tstamp;
 	int cnt, rc;
 	struct eeh_dev *edev;
+	struct eeh_pe *tmp_pe;
+	bool any_passed = false;
+
+	eeh_for_each_pe(pe, tmp_pe)
+		any_passed |= eeh_pe_passed(tmp_pe);
 
 	/* pcibios will clear the counter; save the value */
 	cnt = pe->freeze_count;
@@ -676,7 +670,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 	 * into pci_hp_add_devices().
 	 */
 	eeh_pe_state_mark(pe, EEH_PE_KEEP);
-	if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
+	if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) {
 		eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
 	} else {
 		pci_lock_rescan_remove();
@@ -693,7 +687,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 	 * config accesses. So we prefer to block them. However, controlled
 	 * PCI config accesses initiated from EEH itself are allowed.
 	 */
-	rc = eeh_pe_reset_full(pe);
+	rc = eeh_pe_reset_full(pe, false);
 	if (rc)
 		return rc;
 
@@ -704,7 +698,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 	eeh_pe_restore_bars(pe);
 
 	/* Clear frozen state */
-	rc = eeh_clear_pe_frozen_state(pe, true);
+	rc = eeh_clear_pe_frozen_state(pe, false);
 	if (rc) {
 		pci_unlock_rescan_remove();
 		return rc;
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 38edeb4729a9..1a742fe8f6db 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -74,13 +74,13 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 			ret = eeh_pe_get_state(pe);
 			break;
 		case VFIO_EEH_PE_RESET_DEACTIVATE:
-			ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
+			ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, true);
 			break;
 		case VFIO_EEH_PE_RESET_HOT:
-			ret = eeh_pe_reset(pe, EEH_RESET_HOT);
+			ret = eeh_pe_reset(pe, EEH_RESET_HOT, true);
 			break;
 		case VFIO_EEH_PE_RESET_FUNDAMENTAL:
-			ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL);
+			ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL, true);
 			break;
 		case VFIO_EEH_PE_CONFIGURE:
 			ret = eeh_pe_configure(pe);
-- 
2.19.0.2.gcad72f5712



More information about the Linuxppc-dev mailing list