[Skiboot] [PATCH v2 3/3] core/pci-quirk: Microsemi switch UR workaround

Alistair Popple alistair at popple.id.au
Fri Aug 2 12:28:02 AEST 2019


This looks like a pretty good compromise to me, getting the caching right 
would have scared me anyway.

The only comment I have is we could in theory end up blocking the entire config 
space. Obviously that means something is pretty broken so not sure if you want 
to define some minimum viable config space and let the EEH errors through for 
those.  In other words start scanning from say offset 0x3c instead of 0x00 
which might also speed things up slightly.

However the code looks good, and the above is mostly a nit-pick that I don't 
feel particularly strongly about so:

Reveiwed-By: Alistair Popple <alistair at popple.id.au>

On Thursday, 1 August 2019 4:44:24 PM AEST Oliver O'Halloran wrote:
> Some Microsemi switches have a bug where accessing an unimplemented
> config space register causes an Unsupported Request error. This is a
> violation of the PCI spec which requires devices to ignore writes and
> return 0x00 when an unimplemented config space register is accessed.
> 
> Linux allows userspace to access all of config space and tools
> (e.g. lspci) will read the entire 4KB space. This results in flood of
> spurious EEH events since POWER chips treat URs as an indication of a
> malfunctioning device.
> 
> This patch adds a PCI device quirk that scans the config space of
> the switch in early boot to determine what ranges will trigger a UR.
> With this information we can then use config filters to block accesses
> to the problematic ranges.
> 
> This scanning process is a little slow, but:
> 
> a) This bug should be resolved by a switch firmware update eventually, and
> b) System firmware updates might result PCIe capabilities being added or
>    removed from the switch's config space. This means that we would
>    have a cache invalidation problem which isn't straightforward to
>    resolve.
> 
> We can check if the workaround is needed at all by reading 0xFF (the end
> of the legacy config space) since we know the switch never has anything
> implemented for that address. Do the simple thing for now rather than
> trying to make it faster since this should be a temporary workaround.
> 
> Signed-off-by: Oliver O'Halloran <oohall at gmail.com>
> ---
> v2:
>     Check if the switch requires the workaround before
>     doing the scan and dropped the hard-coded ranges.
> 
>     Return 0x00 rather than 0xf0 for blocked reads since that's
>     what the PCI spec says to do.
> ---
>  core/pci-quirk.c          | 70 +++++++++++++++++++++++++++++++++++++++
>  core/test/run-pci-quirk.c | 19 +++++++++++
>  core/test/stubs.c         |  1 +
>  3 files changed, 90 insertions(+)
> 
> diff --git a/core/pci-quirk.c b/core/pci-quirk.c
> index 6832b9cee2ac..1fa43d80d12c 100644
> --- a/core/pci-quirk.c
> +++ b/core/pci-quirk.c
> @@ -9,10 +9,79 @@
>  
>  #include <skiboot.h>
>  #include <pci.h>
> +#include <pci-cfg.h>
>  #include <pci-quirk.h>
>  #include <platform.h>
>  #include <ast.h>
>  
> +static int64_t cfg_block_filter(void *dev __unused,
> +				struct pci_cfg_reg_filter *pcrf __unused,
> +				uint32_t offset __unused, uint32_t len,
> +				uint32_t *data, bool write)
> +{
> +	if (write)
> +		return OPAL_SUCCESS;
> +
> +	switch (len) {
> +	case 4:
> +		*data = 0x0;
> +		return OPAL_SUCCESS;
> +	case 2:
> +		*((uint16_t *)data) = 0x0;
> +		return OPAL_SUCCESS;
> +	case 1:
> +		*((uint8_t *)data) = 0x0;
> +		return OPAL_SUCCESS;
> +	}
> +
> +	return OPAL_PARAMETER; /* should never happen */
> +}
> +
> +/* blocks config accesses to registers in the range: [start, end] */
> +#define BLOCK_CFG_RANGE(pd, start, end) \
> +	pci_add_cfg_reg_filter(pd, start, end - start + 1, \
> +		PCI_REG_FLAG_WRITE | PCI_REG_FLAG_READ, \
> +		cfg_block_filter);
> +
> +static void quirk_microsemi_gen4_sw(struct phb *phb, struct pci_device *pd)
> +{
> +	uint8_t data;
> +	bool frozen;
> +	int offset;
> +	int start;
> +
> +	pci_check_clear_freeze(phb);
> +
> +	/*
> +	 * Reading from 0xff should trigger a UR on the affected switches.
> +	 * If we don't get a freeze then we don't need the workaround
> +	 */
> +	pci_cfg_read8(phb, pd->bdfn, 0xff, &data);
> +	frozen = pci_check_clear_freeze(phb);
> +	if (!frozen)
> +		return;
> +
> +	for (start = -1, offset = 0; offset < 4096; offset++) {
> +		pci_cfg_read8(phb, pd->bdfn, offset, &data);
> +		frozen = pci_check_clear_freeze(phb);
> +
> +		if (start < 0 && frozen) { /* new UR range */
> +			start = offset;
> +		} else if (start >= 0 && !frozen) { /* end of range */
> +			BLOCK_CFG_RANGE(pd, start, offset - 1);
> +			PCINOTICE(phb, pd->bdfn, "Applied UR workaround to [%03x..%03x]
\n", start, offset - 1);
> +
> +			start = -1;
> +		}
> +	}
> +
> +	/* range lasted until the end of config space */
> +	if (start >= 0) {
> +		BLOCK_CFG_RANGE(pd, start, 0xfff);
> +		PCINOTICE(phb, pd->bdfn, "Applied UR workaround to [%03x..fff]\n", 
start);
> +	}
> +}
> +
>  static void quirk_astbmc_vga(struct phb *phb __unused,
>  			     struct pci_device *pd)
>  {
> @@ -44,6 +113,7 @@ static void quirk_astbmc_vga(struct phb *phb __unused,
>  static const struct pci_quirk quirk_table[] = {
>  	/* ASPEED 2400 VGA device */
>  	{ 0x1a03, 0x2000, &quirk_astbmc_vga },
> +	{ 0x11f8, 0x4052, &quirk_microsemi_gen4_sw },
>  	{ 0, 0, NULL }
>  };
>  
> diff --git a/core/test/run-pci-quirk.c b/core/test/run-pci-quirk.c
> index 4c57c61adadf..bd5cacba9fb7 100644
> --- a/core/test/run-pci-quirk.c
> +++ b/core/test/run-pci-quirk.c
> @@ -1,6 +1,7 @@
>  #include <assert.h>
>  #include <stdint.h>
>  #include <compiler.h>
> +#include <stdbool.h>
>  
>  /* Stubs for quirk_astbmc_vga() */
>  
> @@ -27,6 +28,24 @@ static struct dt_property *__dt_add_property_cells(
>  	return (void *)0;
>  }
>  
> +struct pci_device;
> +struct pci_cfg_reg_filter;
> +typedef int64_t (*pci_cfg_reg_func)(void *dev,
> +				    struct pci_cfg_reg_filter *pcrf,
> +				    uint32_t offset, uint32_t len,
> +				    uint32_t *data, bool write);
> +
> +
> +static struct pci_cfg_reg_filter *pci_add_cfg_reg_filter(
> +	struct pci_device *pd __unused,
> +	uint32_t start __unused,
> +	uint32_t len __unused,
> +	uint32_t flags __unused,
> +	pci_cfg_reg_func func __unused)
> +{
> +	return NULL;
> +}
> +
>  #include "../pci-quirk.c"
>  
>  struct pci_device test_pd;
> diff --git a/core/test/stubs.c b/core/test/stubs.c
> index 343ee9ffcc4b..24836c414d06 100644
> --- a/core/test/stubs.c
> +++ b/core/test/stubs.c
> @@ -97,3 +97,4 @@ STUB(dt_next);
>  STUB(dt_has_node_property);
>  STUB(dt_get_address);
>  STUB(add_chip_dev_associativity);
> +STUB(pci_check_clear_freeze);
> 






More information about the Skiboot mailing list