powerpc/pci: [PATCH 1/1]: PCIE PHB reset
wenxiong
wenxiong at linux.vnet.ibm.com
Fri May 15 02:46:52 AEST 2020
On 2020-05-12 00:28, Sam Bobroff wrote:
> On Thu, May 07, 2020 at 08:10:37AM -0500, wenxiong at linux.vnet.ibm.com
> wrote:
>> From: Wen Xiong <wenxiong at linux.vnet.ibm.com>
>>
>> Several device drivers hit EEH(Extended Error handling) when
>> triggering
>> kdump on Pseries PowerVM. This patch implemented a reset of the PHBs
>> in pci general code. PHB reset stop all PCI transactions from previous
>> kernel. We have tested the patch in several enviroments:
>> - direct slot adapters
>> - adapters under the switch
>> - a VF adapter in PowerVM
>> - a VF adapter/adapter in KVM guest.
>>
>> Signed-off-by: Wen Xiong <wenxiong at linux.vnet.ibm.com>
>
> Hi Wen Xiong,
>
> I saw Oliver's review and I think he's covered the main issues I was
> going to raise:
> - This will run and produce some spurious errors on powernv. (I think
> distros do compile in both pseries and powernv.)
> - There's a bit of code duplication but it's probably OK for this
> patch.
>
Hi Oliver and Sam,
Yes. Thanks for reviewing the code. I will keep some code duplication
for now until Oliver re-work the EEH code. I am going to fix several
minor things in error code patch.
Thanks,
Wendy
> I have a few other minor comments, below:
>
>> ---
>> arch/powerpc/platforms/pseries/pci.c | 153
>> +++++++++++++++++++++++++++
>> 1 file changed, 153 insertions(+)
>>
>> diff --git a/arch/powerpc/platforms/pseries/pci.c
>> b/arch/powerpc/platforms/pseries/pci.c
>> index 911534b89c85..aac7f00696d2 100644
>> --- a/arch/powerpc/platforms/pseries/pci.c
>> +++ b/arch/powerpc/platforms/pseries/pci.c
>> @@ -11,6 +11,8 @@
>> #include <linux/kernel.h>
>> #include <linux/pci.h>
>> #include <linux/string.h>
>> +#include <linux/crash_dump.h>
>> +#include <linux/delay.h>
>>
>> #include <asm/eeh.h>
>> #include <asm/pci-bridge.h>
>> @@ -354,3 +356,154 @@ int pseries_root_bridge_prepare(struct
>> pci_host_bridge *bridge)
>>
>> return 0;
>> }
>> +
>> +/**
>> + * pseries_get_pdn_addr - Retrieve PHB address
>> + * @pe: EEH PE
>> + *
>> + * Retrieve the assocated PHB address. Actually, there're 2 RTAS
>> + * function calls dedicated for the purpose. We need implement
>> + * it through the new function and then the old one. Besides,
>> + * you should make sure the config address is figured out from
>> + * FDT node before calling the function.
>> + *
>> + */
>> +static int pseries_get_pdn_addr(struct pci_controller *phb)
>> +{
>> + int ret = -1;
>> + int rets[3];
>> + int ibm_get_config_addr_info;
>> + int ibm_get_config_addr_info2;
>> + int config_addr = 0;
>> + struct pci_dn *root_pdn, *pdn;
>> +
>> + ibm_get_config_addr_info2 =
>> rtas_token("ibm,get-config-addr-info2");
>> + ibm_get_config_addr_info =
>> rtas_token("ibm,get-config-addr-info");
>> +
>> + root_pdn = PCI_DN(phb->dn);
>> + pdn = list_first_entry(&root_pdn->child_list, struct pci_dn, list);
>> + config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
>> +
>> + if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
>> + /*
>> + * First of all, we need to make sure there has one PE
>> + * associated with the device. Otherwise, PE address is
>> + * meaningless.
>> + */
>
> This comment might be better if it explained how using option=0
> with ibm_get_config_addr tests the PE.
>
>> + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
>> + config_addr, BUID_HI(pdn->phb->buid),
>> + BUID_LO(pdn->phb->buid), 1);
>> + if (ret || (rets[0] == 0)) {
>> + pr_warn("%s: Failed to get address for PHB#%x-PE# "
>> + "option=%d config_addr=%x\n",
>> + __func__, pdn->phb->global_number, 1, rets[0]);
>> + return -1;
>> + }
>> +
>> + /* Retrieve the associated PE config address */
>> + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
>> + config_addr, BUID_HI(pdn->phb->buid),
>> + BUID_LO(pdn->phb->buid), 0);
>> + if (ret) {
>> + pr_warn("%s: Failed to get address for PHB#%x-PE# "
>> + "option=%d config_addr=%x\n",
>> + __func__, pdn->phb->global_number, 0, rets[0]);
>> + return -1;
>> + }
>> + return rets[0];
>> + }
>> +
>> + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
>> + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
>> + config_addr, BUID_HI(pdn->phb->buid),
>> + BUID_LO(pdn->phb->buid), 0);
>> + if (ret || rets[0]) {
>> + pr_warn("%s: Failed to get address for PHB#%x-PE# "
>> + "config_addr=%x\n",
>> + __func__, pdn->phb->global_number, rets[0]);
>> + return -1;
>> + }
>> + return rets[0];
>> + }
>> +
>> + return ret;
> Can this ever return anything other than 0?
>
>> +}
>> +
>> +static int __init pseries_phb_reset(void)
>> +{
>> + struct pci_controller *phb;
>> + int config_addr;
>> + int ibm_set_slot_reset;
>> + int ibm_configure_pe;
>> + int ret;
>> +
>> + if (is_kdump_kernel() || reset_devices) {
>> + pr_info("Issue PHB reset ...\n");
>> + ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
>> + ibm_configure_pe = rtas_token("ibm,configure-pe");
>> +
>> + if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
>> + ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
>> + pr_info("%s: EEH functionality not supported\n",
>> + __func__);
>> + }
>> +
>> + list_for_each_entry(phb, &hose_list, list_node) {
>> + config_addr = pseries_get_pdn_addr(phb);
>> + if (config_addr == -1)
>> + continue;
>> +
>> + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
>> + config_addr, BUID_HI(phb->buid),
>> + BUID_LO(phb->buid), EEH_RESET_FUNDAMENTAL);
>> +
>> + /* If fundamental-reset not supported, try hot-reset */
>> + if (ret == -8)
>> + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
>> + config_addr, BUID_HI(phb->buid),
>> + BUID_LO(phb->buid), EEH_RESET_HOT);
>> +
>> + if (ret) {
>> + pr_err("%s: fail with rtas_call fundamental reset=%d\n",
>> + __func__, ret);
>
> This error might be a bit confusing, since it's not clear if the result
> came from the fundamental or hot-reset.
>
>> + continue;
>> + }
>> + }
>> + msleep(EEH_PE_RST_SETTLE_TIME);
>> +
>> + list_for_each_entry(phb, &hose_list, list_node) {
>> + config_addr = pseries_get_pdn_addr(phb);
>> + if (config_addr == -1)
>> + continue;
>> +
>> + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
>> + config_addr, BUID_HI(phb->buid),
>> + BUID_LO(phb->buid), EEH_RESET_DEACTIVATE);
>> + if (ret) {
>> + pr_err("%s: fail with rtas_call deactive=%d\n",
>> + __func__, ret);
>> + continue;
>> + }
>> + }
>> + msleep(EEH_PE_RST_SETTLE_TIME);
>> +
>> + list_for_each_entry(phb, &hose_list, list_node) {
>> + config_addr = pseries_get_pdn_addr(phb);
>> + if (config_addr == -1)
>> + continue;
>> +
>> + ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
>> + config_addr, BUID_HI(phb->buid),
>> + BUID_LO(phb->buid));
>> + if (ret) {
>> + pr_err("%s: fail with rtas_call configure_pe =%d\n",
>> + __func__, ret);
>
> These errors might be more useful if they indicated which PHB caused
> the
> error.
>
>> + continue;
>> + }
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +postcore_initcall(pseries_phb_reset);
>> +
>> --
>> 2.18.1
>>
More information about the Linuxppc-dev
mailing list