[PATCH 1/1 V4] : PCIE PHB reset
wenxiong at linux.vnet.ibm.com
wenxiong at linux.vnet.ibm.com
Tue Jul 14 00:39:33 AEST 2020
From: Wen Xiong <wenxiong at linux.vnet.ibm.com>
Several device drivers hit EEH(Extended Error handling) when triggering
kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
in pci general code when triggering kdump. PHB reset stop all PCI
transactions from normal kernel. We have tested the patch in several
enviroments:
- direct slot adapters
- adapters under the switch
- a VF adapter in PowerVM
- a VF adapter/adapter in KVM guest.
Change History:
V4:
- Merge the code from pseries/pci.c to pseries/eeh_pseries.c
- Add 3 helper functions which are shared by eeh code and this path.
Reviewed by Michael Ellerman.
V3:
- Change the comments(Reviewed by Gustavo Romero)
V2:
- change to machine_postcore_initall(Reviewed by Oliver Halloran)
- change the error pathes(Reviewed by Sam Bobroff)
V1:
- initial version
Signed-off-by: Wen Xiong<wenxiong at linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/eeh_pseries.c | 234 ++++++++++++++-----
1 file changed, 170 insertions(+), 64 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index ace117f99d94..a3ae8d206a86 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -24,6 +24,7 @@
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
+#include <linux/crash_dump.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
@@ -80,6 +81,152 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
eeh_probe_device(pdev);
}
+
+/**
+ * pseries_eeh_get_config_addr - Retrieve config address
+ *
+ * Retrieve the assocated config address. Actually, there're 2 RTAS
+ * function calls dedicated for the purpose. We need implement
+ * it through the new function and then the old one. Besides,
+ * you should make sure the config address is figured out from
+ * FDT node before calling the function.
+ *
+ * It's notable that zero'ed return value means invalid PE config
+ * address.
+ */
+static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_addr)
+{
+ int ret = 0;
+ int rets[3];
+
+ if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
+ /*
+ * First of all, we need to make sure there has one PE
+ * associated with the device. Otherwise, PE address is
+ * meaningless.
+ */
+ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+ config_addr, BUID_HI(phb->buid),
+ BUID_LO(phb->buid), 1);
+ if (ret || (rets[0] == 0))
+ return 0;
+
+ /* Retrieve the associated PE config address */
+ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+ config_addr, BUID_HI(phb->buid),
+ BUID_LO(phb->buid), 0);
+ if (ret) {
+ pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
+ __func__, phb->global_number, config_addr);
+ return 0;
+ }
+
+ return rets[0];
+ }
+
+ if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
+ ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
+ config_addr, BUID_HI(phb->buid),
+ BUID_LO(phb->buid), 0);
+ if (ret) {
+ pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
+ __func__, phb->global_number, config_addr);
+ return 0;
+ }
+
+ return rets[0];
+ }
+
+ return ret;
+}
+
+/**
+ * pseries_eeh_phb_reset - Reset the specified PHB
+ * @phb: PCI controller
+ * @config_adddr: the associated config address
+ * @option: reset option
+ *
+ * Reset the specified PHB/PE
+ */
+static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, int option)
+{
+ int ret;
+
+ /* Reset PE through RTAS call */
+ ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+ config_addr, BUID_HI(phb->buid),
+ BUID_LO(phb->buid), option);
+
+ /* If fundamental-reset not supported, try hot-reset */
+ if (option == EEH_RESET_FUNDAMENTAL &&
+ ret == -8) {
+ option = EEH_RESET_HOT;
+ ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+ config_addr, BUID_HI(phb->buid),
+ BUID_LO(phb->buid), option);
+ }
+
+ /* We need reset hold or settlement delay */
+ if (option == EEH_RESET_FUNDAMENTAL ||
+ option == EEH_RESET_HOT)
+ msleep(EEH_PE_RST_HOLD_TIME);
+ else
+ msleep(EEH_PE_RST_SETTLE_TIME);
+
+ return ret;
+}
+
+/**
+ * pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE
+ * @phb: PCI controller
+ * @config_adddr: the associated config address
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int pseries_eeh_phb_configure_bridge(struct pci_controller *phb, int config_addr)
+{
+ int ret;
+ /* Waiting 0.2s maximum before skipping configuration */
+ int max_wait = 200;
+
+ while (max_wait > 0) {
+ ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+ config_addr, BUID_HI(phb->buid),
+ BUID_LO(phb->buid));
+
+ if (!ret)
+ return ret;
+ if (ret < 0)
+ break;
+
+ /*
+ * If RTAS returns a delay value that's above 100ms, cut it
+ * down to 100ms in case firmware made a mistake. For more
+ * on how these delay values work see rtas_busy_delay_time
+ */
+ if (ret > RTAS_EXTENDED_DELAY_MIN+2 &&
+ ret <= RTAS_EXTENDED_DELAY_MAX)
+ ret = RTAS_EXTENDED_DELAY_MIN+2;
+
+ max_wait -= rtas_busy_delay_time(ret);
+
+ if (max_wait < 0)
+ break;
+
+ rtas_busy_delay(ret);
+ }
+
+ pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
+ __func__, phb->global_number, config_addr, ret);
+ /* PAPR defines -3 as "Parameter Error" for this function: */
+ if (ret == -3)
+ return -EINVAL;
+ else
+ return -EIO;
+}
+
/*
* Buffer for reporting slot-error-detail rtas calls. Its here
* in BSS, and not dynamically alloced, so that it ends up in
@@ -96,6 +243,10 @@ static int eeh_error_buf_size;
*/
static int pseries_eeh_init(void)
{
+ struct pci_controller *phb;
+ struct pci_dn *pdn;
+ int addr, config_addr;
+
/* figure out EEH RTAS function call tokens */
ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
@@ -148,6 +299,22 @@ static int pseries_eeh_init(void)
/* Set EEH machine dependent code */
ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
+ if (is_kdump_kernel() || reset_devices) {
+ pr_info("Issue PHB reset ...\n");
+ list_for_each_entry(phb, &hose_list, list_node) {
+ pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
+ addr = (pdn->busno << 16) | (pdn->devfn << 8);
+ config_addr = pseries_eeh_get_config_addr(phb, addr);
+ /* invalid PE config addr */
+ if (config_addr == 0)
+ continue;
+
+ pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
+ pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
+ pseries_eeh_phb_configure_bridge(phb, config_addr);
+ }
+ }
+
return 0;
}
@@ -569,35 +736,13 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
static int pseries_eeh_reset(struct eeh_pe *pe, int option)
{
int config_addr;
- int ret;
/* Figure out PE address */
config_addr = pe->config_addr;
if (pe->addr)
config_addr = pe->addr;
-
- /* Reset PE through RTAS call */
- ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
- config_addr, BUID_HI(pe->phb->buid),
- BUID_LO(pe->phb->buid), option);
-
- /* If fundamental-reset not supported, try hot-reset */
- if (option == EEH_RESET_FUNDAMENTAL &&
- ret == -8) {
- option = EEH_RESET_HOT;
- ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
- config_addr, BUID_HI(pe->phb->buid),
- BUID_LO(pe->phb->buid), option);
- }
-
- /* We need reset hold or settlement delay */
- if (option == EEH_RESET_FUNDAMENTAL ||
- option == EEH_RESET_HOT)
- msleep(EEH_PE_RST_HOLD_TIME);
- else
- msleep(EEH_PE_RST_SETTLE_TIME);
-
- return ret;
+
+ return pseries_eeh_phb_reset(pe->phb, config_addr, option);
}
/**
@@ -641,56 +786,17 @@ static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, u
* pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
* @pe: EEH PE
*
- * The function will be called to reconfigure the bridges included
- * in the specified PE so that the mulfunctional PE would be recovered
- * again.
*/
static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
{
int config_addr;
- int ret;
- /* Waiting 0.2s maximum before skipping configuration */
- int max_wait = 200;
/* Figure out the PE address */
config_addr = pe->config_addr;
if (pe->addr)
config_addr = pe->addr;
- while (max_wait > 0) {
- ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
- config_addr, BUID_HI(pe->phb->buid),
- BUID_LO(pe->phb->buid));
-
- if (!ret)
- return ret;
- if (ret < 0)
- break;
-
- /*
- * If RTAS returns a delay value that's above 100ms, cut it
- * down to 100ms in case firmware made a mistake. For more
- * on how these delay values work see rtas_busy_delay_time
- */
- if (ret > RTAS_EXTENDED_DELAY_MIN+2 &&
- ret <= RTAS_EXTENDED_DELAY_MAX)
- ret = RTAS_EXTENDED_DELAY_MIN+2;
-
- max_wait -= rtas_busy_delay_time(ret);
-
- if (max_wait < 0)
- break;
-
- rtas_busy_delay(ret);
- }
-
- pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
- __func__, pe->phb->global_number, pe->addr, ret);
- /* PAPR defines -3 as "Parameter Error" for this function: */
- if (ret == -3)
- return -EINVAL;
- else
- return -EIO;
+ return pseries_eeh_phb_configure_bridge(pe->phb, config_addr);
}
/**
--
2.18.1
More information about the Linuxppc-dev
mailing list