[Skiboot] [PATCH 07/12] lpc: Fix lpc_probe_test

Andrew Jeffery andrew at aj.id.au
Fri Oct 18 12:11:28 AEDT 2019


Hi Deb,

On Fri, 18 Oct 2019, at 08:23, Deb McLemore wrote:
> lpc_probe_test is expecting the irqstat to be seeded with a NORESP_ERR,
> so handle appropriately.

We don't always expect NORESP_ERR.

I think you need to add more context here about what drove this change.

Some context my own:

The probing exists to synchronously test for the presence of devices in the
LPC IO space. We need to do this as their presence depends on the
configuration of the BMC and unfortunately there's no way of to determine
whether the devices are there besides just trying to access their addresses
and testing the LPCHC error state.

On P9 the accesses are memory mapped and errors are typically handled by
asynchronous delivery of an IRQ, so in general we want to prevent that. The
code tries to handle this but only gets the happy case right. I've pasted what
should be the fix at the bottom as something we should try out.

Concretely, the device in question is the BMC's SuperIO device. Whether this
is absent or present determines how we will access the PNOR. If it is absent
we must use the IPMI HIOMAP transport to configure the LPC FW space. If it
is present then we either use the MBOX HIOMAP transport or drive the BMC's
SPI flash controller directly through the iLPC2AHB backdoor (which we do
roughly equates to whether we're a P9 or P8).

So we need the probe to determine what style of flash access we need to
use.

> 
> Signed-off-by: Deb McLemore <debmc at linux.ibm.com>
> ---
>  hw/lpc.c | 27 ++++++++++++++++++++++-----
>  1 file changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/lpc.c b/hw/lpc.c
> index 354d2b4..657570a 100644
> --- a/hw/lpc.c
> +++ b/hw/lpc.c
> @@ -485,9 +485,15 @@ static int64_t lpc_probe_test(struct lpcm *lpc)
>  	const uint32_t irqmask_addr = lpc_reg_opb_base + LPC_HC_IRQMASK;
>  	const uint32_t irqstat_addr = lpc_reg_opb_base + LPC_HC_IRQSTAT;
>  	uint32_t irqmask, irqstat;
> +	static int lpc_bus_probe_counter;
>  	int64_t idx;
>  	int rc;
>  
> +	++lpc_bus_probe_counter;
> +	/* compensate by one for the probe that happens before interrupts 
> enabled */
> +	prlog(PR_NOTICE, "%s COMPARE lpc_bus_probe_counter=%i to 
> lpc_bus_err_count in log\n",
> +		__func__,
> +		lpc_bus_probe_counter - 1);

What were you doing to need this information?

>  	rc = opb_read(lpc, irqstat_addr, &irqstat, 4);
>  	if (rc)
>  		return rc;
> @@ -505,19 +511,27 @@ static int64_t lpc_probe_test(struct lpcm *lpc)
>  	if (rc)
>  		return rc;
>  
> -	if (!(irqstat & LPC_HC_IRQ_BASE_IRQS))
> -		return OPAL_SUCCESS;
> +	/* we are expecting LPC_HC_IRQ_SYNC_NORESP_ERR */

As above, it depends.

> +	if ((irqstat & LPC_HC_IRQ_BASE_IRQS)) {
> +		prlog(PR_NOTICE, "%s irqstat=0x%x OPAL_SUCCESS!\n",
> +			__func__, irqstat);

This case roughly means something went wrong with the access.

> +	} else {
> +		/* maybe bad hardware or someting else ?? */

This case means the access succeeded. So in the context of how probing
is currently used, it means that the BMC's SuperIO device is available.

> +		prlog(PR_NOTICE, "%s irqstat=0x%x INVESTIGATE!\n",
> +			__func__, irqstat);
> +	}
>  
>  	/* Ensure we can perform a valid lookup in the error table */
>  	idx = LPC_ERROR_IDX(irqstat);
>  	if (idx < 0 || idx >= ARRAY_SIZE(lpc_error_table)) {

The early exit that you removed helped limit this to dealing with unexpected
cases. We expect the irqstatus to be 0 if the address we're probing has a
device enabled, in which case the __builtin_ffs() in LPC_ERROR_IDX() will
return 0. We then do a bunch of subtractions which make idx < 0 and hit
the log message below.

The intent was we shouldn't be logging them in expected cases, which is
why it was a prerror().

> -		prerror("LPC bus error translation failed with status 0x%x\n",
> -			irqstat);
> +		prlog(PR_ERR, "%s bus error translation failed with idx=%llu irqstat=0x%x\n",
> +			__func__, idx, irqstat);
>  		return OPAL_PARAMETER;
>  	}
>  
>  	rc = lpc_error_table[idx].rc;
> -	return rc;
> +	prlog(PR_TRACE, "%s VALID LOOKUP CONFIRMED idx=%llu rc=0x%x\n", 
> __func__, idx, rc);
> +	return OPAL_SUCCESS;

The rc indicated to the caller whether the device was present or absent. By
returning OPAL_SUCCESS here we might as well have not done the probe at
all, because the caller has no way to tell what the result was.

The result will likely be some avoidable errors down the track as we try to
use the SuperIO device and fail in unexpected ways.

>  }
>  
>  static int64_t __lpc_write(struct lpcm *lpc, enum OpalLPCAddressType 
> addr_type,
> @@ -1037,6 +1051,9 @@ static void lpc_dispatch_err_irqs(struct lpcm 
> *lpc, uint32_t irqs)
>  	/* Find and report the error */
>  	err = &lpc_error_table[idx];
>  	lpc_bus_err_count++;
> +	prlog(PR_NOTICE, "%s COMPARE lpc_bus_err_count=%i to 
> lpc_bus_probe_counter in log\n",
> +		__func__,
> +		lpc_bus_err_count);
>  	if (manufacturing_mode && (lpc_bus_err_count > 
> LPC_BUS_DEGRADED_PERF_THRESHOLD))
>  		info = &e_info(OPAL_RC_LPC_SYNC_PERF);
>  	else
> -- 
> 2.7.4
> 

Here's what I think should fix the IRQ masking issue. It at least compiles, I haven't
tested it. Currently its somewhat moot as we don't have IRQs enabled when we
issue the probe via ast_sio_is_enabled() in ast_sio_init() (called from
astbmc_early_init() in the platform's probe()) and pnor_init() from astbmc_init()
(platform init code).

diff --git a/hw/lpc.c b/hw/lpc.c
index 354d2b4f0bd2..fedceb4189de 100644
--- a/hw/lpc.c
+++ b/hw/lpc.c
@@ -461,30 +461,28 @@ static const struct lpc_error_entry lpc_error_table[] = {
 	LPC_ERROR(LPC_HC_IRQ_SYNC_ABNORM_ERR, OPAL_WRONG_STATE, "Got SYNC abnormal error."),
 };
 
-static int64_t lpc_probe_prepare(struct lpcm *lpc)
+static int64_t lpc_probe_prepare(struct lpcm *lpc, uint32_t *irqmask)
 {
 	const uint32_t irqmask_addr = lpc_reg_opb_base + LPC_HC_IRQMASK;
 	const uint32_t irqstat_addr = lpc_reg_opb_base + LPC_HC_IRQSTAT;
-	uint32_t irqmask;
 	int rc;
 
-	rc = opb_read(lpc, irqmask_addr, &irqmask, 4);
+	rc = opb_read(lpc, irqmask_addr, irqmask, 4);
 	if (rc)
 		return rc;
 
-	irqmask &= ~LPC_HC_IRQ_SYNC_NORESP_ERR;
-	rc = opb_write(lpc, irqmask_addr, irqmask, 4);
+	rc = opb_write(lpc, irqmask_addr, 0, 4);
 	if (rc)
 		return rc;
 
 	return opb_write(lpc, irqstat_addr, LPC_HC_IRQ_SYNC_NORESP_ERR, 4);
 }
 
-static int64_t lpc_probe_test(struct lpcm *lpc)
+static int64_t lpc_probe_test(struct lpcm *lpc, uint32_t irqmask)
 {
 	const uint32_t irqmask_addr = lpc_reg_opb_base + LPC_HC_IRQMASK;
 	const uint32_t irqstat_addr = lpc_reg_opb_base + LPC_HC_IRQSTAT;
-	uint32_t irqmask, irqstat;
+	uint32_t irqstat;
 	int64_t idx;
 	int rc;
 
@@ -496,11 +494,6 @@ static int64_t lpc_probe_test(struct lpcm *lpc)
 	if (rc)
 		return rc;
 
-	rc = opb_read(lpc, irqmask_addr, &irqmask, 4);
-	if (rc)
-		return rc;
-
-	irqmask |= LPC_HC_IRQ_SYNC_NORESP_ERR;
 	rc = opb_write(lpc, irqmask_addr, irqmask, 4);
 	if (rc)
 		return rc;
@@ -525,11 +518,12 @@ static int64_t __lpc_write(struct lpcm *lpc, enum OpalLPCAddressType addr_type,
 			   bool probe)
 {
 	uint32_t opb_base;
+	uint32_t irqmask;
 	int64_t rc;
 
 	lock(&lpc->lock);
 	if (probe) {
-		rc = lpc_probe_prepare(lpc);
+		rc = lpc_probe_prepare(lpc, &irqmask);
 		if (rc)
 			goto bail;
 	}
@@ -548,7 +542,7 @@ static int64_t __lpc_write(struct lpcm *lpc, enum OpalLPCAddressType addr_type,
 		goto bail;
 
 	if (probe)
-		rc = lpc_probe_test(lpc);
+		rc = lpc_probe_test(lpc, irqmask);
  bail:
 	unlock(&lpc->lock);
 	return rc;
@@ -612,11 +606,12 @@ static int64_t __lpc_read(struct lpcm *lpc, enum OpalLPCAddressType addr_type,
 			  bool probe)
 {
 	uint32_t opb_base;
+	uint32_t irqmask;
 	int64_t rc;
 
 	lock(&lpc->lock);
 	if (probe) {
-		rc = lpc_probe_prepare(lpc);
+		rc = lpc_probe_prepare(lpc, &irqmask);
 		if (rc)
 			goto bail;
 	}
@@ -635,7 +630,7 @@ static int64_t __lpc_read(struct lpcm *lpc, enum OpalLPCAddressType addr_type,
 		goto bail;
 
 	if (probe)
-		rc = lpc_probe_test(lpc);
+		rc = lpc_probe_test(lpc, irqmask);
  bail:
 	unlock(&lpc->lock);
 	return rc;


More information about the Skiboot mailing list