[Skiboot] [PATCH] eeh: Fix eeh event handling

Benjamin Herrenschmidt benh at kernel.crashing.org
Thu Apr 9 16:24:59 AEST 2015


On Thu, 2015-04-09 at 13:47 +1000, Alistair Popple wrote:
> The opal eeh interrupt handlers raise an opal event
> (OPAL_EVENT_PCI_ERROR) whenever there is some processing required from
> the OS. The OS then needs to call opal_pci_next_error(...) in a loop
> passing each phb in turn to clear the event.
> 
> However opal_pci_next_error(...) clears the event unconditionally
> meaning it would be possible for eeh events to be cleared without
> processing them leading to missed events.
> 
> This patch fixes the problem by keeping track of eeh events on a
> per-phb basis and only clearing the opal event once all phb eeh events
> have been cleared.

You need to put a warning or something if you get a PHB ID >= 64 :-)

It might hit us on brazos...

Ben.

> Signed-off-by: Alistair Popple <alistair at popple.id.au>
> ---
>  core/pci-opal.c | 23 +++++++++++++++++++++--
>  core/pci.c      |  2 ++
>  hw/p7ioc-phb.c  |  2 +-
>  hw/p7ioc.c      |  7 +++++--
>  include/pci.h   |  3 +++
>  5 files changed, 32 insertions(+), 5 deletions(-)
> 
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index a1c6e7a..75c689e 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -53,6 +53,26 @@ opal_call(OPAL_PCI_CONFIG_WRITE_BYTE, opal_pci_config_write_byte, 4);
>  opal_call(OPAL_PCI_CONFIG_WRITE_HALF_WORD, opal_pci_config_write_half_word, 4);
>  opal_call(OPAL_PCI_CONFIG_WRITE_WORD, opal_pci_config_write_word, 4);
>  
> +static struct lock opal_eeh_evt_lock = LOCK_UNLOCKED;
> +static uint64_t opal_eeh_evt = 0;
> +
> +void opal_pci_eeh_set_evt(uint64_t phb_id)
> +{
> +	lock(&opal_eeh_evt_lock);
> +	opal_eeh_evt |= 1ULL << phb_id;
> +	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, OPAL_EVENT_PCI_ERROR);
> +	unlock(&opal_eeh_evt_lock);
> +}
> +
> +void opal_pci_eeh_clear_evt(uint64_t phb_id)
> +{
> +	lock(&opal_eeh_evt_lock);
> +	opal_eeh_evt &= ~(1ULL << phb_id);
> +	if (!opal_eeh_evt)
> +		opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, 0);
> +	unlock(&opal_eeh_evt_lock);
> +}
> +
>  static int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number,
>  					  uint8_t *freeze_state,
>  					  uint16_t *pci_error_type,
> @@ -660,8 +680,7 @@ static int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe,
>  		return OPAL_UNSUPPORTED;
>  	phb->ops->lock(phb);
>  
> -	/* Any call to this function clears the error event */
> -	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, 0);
> +	opal_pci_eeh_clear_evt(phb_id);
>  	rc = phb->ops->next_error(phb, first_frozen_pe, pci_error_type,
>  				  severity);
>  	phb->ops->unlock(phb);
> diff --git a/core/pci.c b/core/pci.c
> index 5d97d4b..0b7dd64 100644
> --- a/core/pci.c
> +++ b/core/pci.c
> @@ -21,6 +21,8 @@
>  #include <timebase.h>
>  #include <device.h>
>  
> +/* The eeh event code will need updating if this is ever increased to
> + * support more than 64 phbs */
>  static struct phb *phbs[64];
>  
>  #define PCITRACE(_p, _bdfn, fmt, a...) \
> diff --git a/hw/p7ioc-phb.c b/hw/p7ioc-phb.c
> index d84fd15..89592c4 100644
> --- a/hw/p7ioc-phb.c
> +++ b/hw/p7ioc-phb.c
> @@ -2740,7 +2740,7 @@ static void p7ioc_phb_err_interrupt(void *data, uint32_t isn)
>  
>  	PHBDBG(p, "Got interrupt 0x%04x\n", isn);
>  
> -	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, OPAL_EVENT_PCI_ERROR);
> +	opal_pci_eeh_set_evt(p->phb.opal_id);
>  
>  	/* If the PHB is broken, go away */
>  	if (p->state == P7IOC_PHB_STATE_BROKEN)
> diff --git a/hw/p7ioc.c b/hw/p7ioc.c
> index 2315e81..48a9dc9 100644
> --- a/hw/p7ioc.c
> +++ b/hw/p7ioc.c
> @@ -575,8 +575,11 @@ static void p7ioc_rgc_interrupt(void *data, uint32_t isn)
>  
>  	/* We will notify OS while getting error from GEM */
>  	if (p7ioc_check_GEM(ioc))
> -		opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
> -					OPAL_EVENT_PCI_ERROR);
> +		/* This is a bit hacky but works - we raise the event
> +		on a downstream phb as the OS needs to call
> +		opal_pci_next_error for all phbs to ensure all events
> +		are cleared anyway. */
> +		opal_pci_eeh_set_evt(ioc->phbs[0].phb.opal_id);
>  }
>  
>  static const struct irq_source_ops p7ioc_rgc_irq_ops = {
> diff --git a/include/pci.h b/include/pci.h
> index 9573961..6b3a228 100644
> --- a/include/pci.h
> +++ b/include/pci.h
> @@ -511,4 +511,7 @@ extern void pci_std_swizzle_irq_map(struct dt_node *dt_node,
>  extern void pci_init_slots(void);
>  extern void pci_reset(void);
>  
> +extern void opal_pci_eeh_set_evt(uint64_t phb_id);
> +extern void opal_pci_eeh_clear_evt(uint64_t phb_id);
> +
>  #endif /* __PCI_H */




More information about the Skiboot mailing list