[PATCH RFC 04/15] powerpc/eeh: Sync eeh_pe_next(), eeh_pe_find() and early-out traversals

Sam Bobroff sbobroff at linux.ibm.com
Wed Oct 2 16:02:42 AEST 2019


Reference counting for the in-line loop macro "eeh_for_each_pe()" can
be done by having eeh_pe_next() both get and put references; "rolling"
a reference along the list. This allows most loops to work without
change, although ones that use an "early-out" must manually release
the final reference.

While reference counting will keep the current iteration's PE from
being freed while it is in use, it's also necessary to check that it
hasn't been removed from the list that's being traversed.  This is
done by checking the parent pointer, which is set to NULL on removal
(see eeh_rmv_from_parent_pe()) (PHB type PEs never have their parent
set, but aren't a problem: they can't be removed). If this does occur,
the traversal is terminated. This may leave the traversal incomplete,
but that is preferable to crashing.

Signed-off-by: Sam Bobroff <sbobroff at linux.ibm.com>
---
 arch/powerpc/include/asm/eeh.h   |  7 ++++-
 arch/powerpc/kernel/eeh_driver.c |  4 ++-
 arch/powerpc/kernel/eeh_pe.c     | 50 +++++++++++++++++++++++++-------
 3 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index df843d04268d..3ab03d407eb1 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -111,8 +111,13 @@ struct eeh_pe {
 #define eeh_pe_for_each_dev(pe, edev, tmp) \
 		list_for_each_entry_safe(edev, tmp, &pe->edevs, entry)
 
+/*
+ * Note that eeh_pe_next() maintins a reference on 'pe' for each
+ * iteration and that it must be manually released if the loop is
+ * exited early (i.e. before eeh_pe_next() returns NULL).
+ */
 #define eeh_for_each_pe(root, pe) \
-	for (pe = root; pe; pe = eeh_pe_next(pe, root))
+	for (pe = root, eeh_get_pe(pe); pe; pe = eeh_pe_next(pe, root))
 
 static inline bool eeh_pe_passed(struct eeh_pe *pe)
 {
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 28e54fe3ac6c..b3245d0cfb22 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -590,8 +590,10 @@ static int eeh_clear_pe_frozen_state(struct eeh_pe *root, bool include_passed)
 			for (i = 0; i < 3; i++)
 				if (!eeh_unfreeze_pe(pe))
 					break;
-			if (i >= 3)
+			if (i >= 3) {
+				eeh_put_pe(pe); /* Release loop ref */
 				return -EIO;
+			}
 		}
 	}
 	eeh_pe_state_clear(root, EEH_PE_ISOLATED, include_passed);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index aa279474a928..b89ed46f14e6 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -294,23 +294,44 @@ struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
  *
  * The function is used to retrieve the next PE in the
  * hierarchy PE tree.
+ *
+ * Consumes the ref on 'pe', returns a referenced PE (if not null).
  */
-struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root)
+struct eeh_pe *eeh_pe_next(struct eeh_pe *prev_pe, struct eeh_pe *root)
 {
-	struct list_head *next = pe->child_list.next;
+	struct list_head *next;
+	struct eeh_pe *next_pe = NULL, *pe = prev_pe;
+	unsigned long flags;
 
+	eeh_lock_pes(&flags);
+	if (!(pe->type & EEH_PE_PHB) && !pe->parent) {
+		/* Current PE has been removed since the last iteration.
+		 * There's no way to recover so bail out. The traversal
+		 * may be incomplete.
+		 */
+		eeh_unlock_pes(flags);
+		pr_warn("EEH: Warning: PHB#%x-PE%x: Traversal possibly incomplete.\n",
+			pe->phb->global_number, pe->addr);
+		eeh_put_pe(pe); /* Release ref from last iter */
+		return NULL;
+	}
+	next = pe->child_list.next;
 	if (next == &pe->child_list) {
 		while (1) {
 			if (pe == root)
-				return NULL;
+				goto out;
 			next = pe->child.next;
 			if (next != &pe->parent->child_list)
 				break;
 			pe = pe->parent;
 		}
 	}
-
-	return list_entry(next, struct eeh_pe, child);
+	next_pe = list_entry(next, struct eeh_pe, child);
+	eeh_get_pe(next_pe); /* Acquire ref for next iter */
+out:
+	eeh_unlock_pes(flags);
+	eeh_put_pe(prev_pe); /* Release ref from last iter */
+	return next_pe;
 }
 
 /**
@@ -332,7 +353,10 @@ void *eeh_pe_traverse(struct eeh_pe *root,
 
 	eeh_for_each_pe(root, pe) {
 		ret = fn(pe, flag);
-		if (ret) return ret;
+		if (ret) {
+			eeh_put_pe(pe); /* Early-out: release last ref */
+			return ret;
+		}
 	}
 
 	return NULL;
@@ -388,24 +412,26 @@ static void *__eeh_pe_find(struct eeh_pe *pe, void *flag)
 	if (pe->type & EEH_PE_PHB)
 		return NULL;
 
+	eeh_get_pe(pe); /* Acquire ref */
 	/*
 	 * We prefer PE address. For most cases, we should
 	 * have non-zero PE address
 	 */
 	if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
 		if (tmp->pe_no == pe->addr)
-			return pe;
+			return pe; /* Give ref */
 	} else {
 		if (tmp->pe_no &&
 		    (tmp->pe_no == pe->addr))
-			return pe;
+			return pe; /* Give ref */
 	}
 
 	/* Try BDF address */
 	if (tmp->config_addr &&
 	   (tmp->config_addr == pe->config_addr))
-		return pe;
+		return pe; /* Give ref */
 
+	eeh_put_pe(pe); /* Release ref */
 	return NULL;
 }
 
@@ -421,15 +447,17 @@ static void *__eeh_pe_find(struct eeh_pe *pe, void *flag)
  * notable that the PE address has 2 format: traditional PE address
  * which is composed of PCI bus/device/function number, or unified
  * PE address.
+ * Returns a ref'd PE or NULL.
  */
 struct eeh_pe *eeh_pe_find(struct pci_controller *phb,
-			   int pe_no, int config_addr)
+		int pe_no, int config_addr)
 {
-	struct eeh_pe *root = eeh_phb_pe_get(phb);
+	struct eeh_pe *root = eeh_phb_pe_get(phb); /* Acquire ref */
 	struct eeh_pe_get_flag tmp = { pe_no, config_addr };
 	struct eeh_pe *pe;
 
 	pe = eeh_pe_traverse(root, __eeh_pe_find, &tmp);
+	eeh_put_pe(root); /* Release ref */
 
 	return pe;
 }
-- 
2.22.0.216.g00a2a96fc9



More information about the Linuxppc-dev mailing list