[PATCH RFC 11/15] powerpc/eeh: Sync eeh_dev_check_failure()
Sam Bobroff
sbobroff at linux.ibm.com
Wed Oct 2 16:02:49 AEST 2019
Synchronize access to eeh_pe.
Signed-off-by: Sam Bobroff <sbobroff at linux.ibm.com>
---
arch/powerpc/kernel/eeh.c | 26 ++++++++++++++++++++------
1 file changed, 20 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index eb37cb384ff4..171be70b34d8 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -447,7 +447,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
int eeh_dev_check_failure(struct eeh_dev *edev)
{
int ret;
- unsigned long flags;
+ unsigned long flags, pe_flags;
struct device_node *dn;
struct pci_dev *dev;
struct eeh_pe *pe, *parent_pe;
@@ -464,7 +464,9 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
return 0;
}
dev = eeh_dev_to_pci_dev(edev);
+ /* TODO: Unsafe until eeh_dev can be synchronized with eeh_pe. */
pe = eeh_dev_to_pe(edev);
+ eeh_get_pe(pe);
/* Access to IO BARs might get this far and still not want checking. */
if (!pe) {
@@ -475,6 +477,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
if (!pe->addr && !pe->config_addr) {
eeh_stats.no_cfg_addr++;
+ eeh_put_pe(pe); /* Release ref */
return 0;
}
@@ -482,17 +485,21 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* On PowerNV platform, we might already have fenced PHB
* there and we need take care of that firstly.
*/
- ret = eeh_phb_check_failure(pe);
- if (ret > 0)
+ ret = eeh_phb_check_failure(pe); /* Acquire ref */
+ if (ret > 0) {
+ eeh_put_pe(pe); /* Release ref */
return ret;
+ }
/*
* If the PE isn't owned by us, we shouldn't check the
* state. Instead, let the owner handle it if the PE has
* been frozen.
*/
- if (eeh_pe_passed(pe))
+ if (eeh_pe_passed(pe)) {
+ eeh_put_pe(pe); /* Release ref */
return 0;
+ }
/* If we already have a pending isolation event for this
* slot, we know it's bad already, we don't need to check.
@@ -548,7 +555,10 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* put into frozen state as well. We should take care
* that at first.
*/
+ eeh_lock_pes(&pe_flags);
parent_pe = pe->parent;
+ eeh_get_pe(parent_pe); /* Acquire ref */
+ eeh_unlock_pes(pe_flags);
while (parent_pe) {
/* Hit the ceiling ? */
if (parent_pe->type & EEH_PE_PHB)
@@ -557,15 +567,18 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
/* Frozen parent PE ? */
ret = eeh_ops->get_state(parent_pe, NULL);
if (ret > 0 && !eeh_state_active(ret)) {
+ eeh_put_pe(pe); /* Release ref */
pe = parent_pe;
+ eeh_get_pe(pe); /* Acquire ref */
pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n",
pe->phb->global_number, pe->addr,
pe->phb->global_number, parent_pe->addr);
}
/* Next parent level */
- parent_pe = parent_pe->parent;
+ eeh_pe_move_to_parent(&parent_pe);
}
+ eeh_put_pe(parent_pe); /* Release ref */
eeh_stats.slot_resets++;
@@ -582,11 +595,12 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
*/
pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
__func__, pe->phb->global_number, pe->addr);
- eeh_send_failure_event(pe);
+ eeh_send_failure_event(pe); /* Give ref */
return 1;
dn_unlock:
+ eeh_put_pe(pe); /* Release ref */
eeh_serialize_unlock(flags);
return rc;
}
--
2.22.0.216.g00a2a96fc9
More information about the Linuxppc-dev
mailing list