[Skiboot] [PATCH] eeh: Fix eeh event handling
Benjamin Herrenschmidt
benh at kernel.crashing.org
Thu Apr 9 16:24:59 AEST 2015
On Thu, 2015-04-09 at 13:47 +1000, Alistair Popple wrote:
> The opal eeh interrupt handlers raise an opal event
> (OPAL_EVENT_PCI_ERROR) whenever there is some processing required from
> the OS. The OS then needs to call opal_pci_next_error(...) in a loop
> passing each phb in turn to clear the event.
>
> However opal_pci_next_error(...) clears the event unconditionally
> meaning it would be possible for eeh events to be cleared without
> processing them leading to missed events.
>
> This patch fixes the problem by keeping track of eeh events on a
> per-phb basis and only clearing the opal event once all phb eeh events
> have been cleared.
You need to put a warning or something if you get a PHB ID >= 64 :-)
It might hit us on brazos...
Ben.
> Signed-off-by: Alistair Popple <alistair at popple.id.au>
> ---
> core/pci-opal.c | 23 +++++++++++++++++++++--
> core/pci.c | 2 ++
> hw/p7ioc-phb.c | 2 +-
> hw/p7ioc.c | 7 +++++--
> include/pci.h | 3 +++
> 5 files changed, 32 insertions(+), 5 deletions(-)
>
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index a1c6e7a..75c689e 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -53,6 +53,26 @@ opal_call(OPAL_PCI_CONFIG_WRITE_BYTE, opal_pci_config_write_byte, 4);
> opal_call(OPAL_PCI_CONFIG_WRITE_HALF_WORD, opal_pci_config_write_half_word, 4);
> opal_call(OPAL_PCI_CONFIG_WRITE_WORD, opal_pci_config_write_word, 4);
>
> +static struct lock opal_eeh_evt_lock = LOCK_UNLOCKED;
> +static uint64_t opal_eeh_evt = 0;
> +
> +void opal_pci_eeh_set_evt(uint64_t phb_id)
> +{
> + lock(&opal_eeh_evt_lock);
> + opal_eeh_evt |= 1ULL << phb_id;
> + opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, OPAL_EVENT_PCI_ERROR);
> + unlock(&opal_eeh_evt_lock);
> +}
> +
> +void opal_pci_eeh_clear_evt(uint64_t phb_id)
> +{
> + lock(&opal_eeh_evt_lock);
> + opal_eeh_evt &= ~(1ULL << phb_id);
> + if (!opal_eeh_evt)
> + opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, 0);
> + unlock(&opal_eeh_evt_lock);
> +}
> +
> static int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number,
> uint8_t *freeze_state,
> uint16_t *pci_error_type,
> @@ -660,8 +680,7 @@ static int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe,
> return OPAL_UNSUPPORTED;
> phb->ops->lock(phb);
>
> - /* Any call to this function clears the error event */
> - opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, 0);
> + opal_pci_eeh_clear_evt(phb_id);
> rc = phb->ops->next_error(phb, first_frozen_pe, pci_error_type,
> severity);
> phb->ops->unlock(phb);
> diff --git a/core/pci.c b/core/pci.c
> index 5d97d4b..0b7dd64 100644
> --- a/core/pci.c
> +++ b/core/pci.c
> @@ -21,6 +21,8 @@
> #include <timebase.h>
> #include <device.h>
>
> +/* The eeh event code will need updating if this is ever increased to
> + * support more than 64 phbs */
> static struct phb *phbs[64];
>
> #define PCITRACE(_p, _bdfn, fmt, a...) \
> diff --git a/hw/p7ioc-phb.c b/hw/p7ioc-phb.c
> index d84fd15..89592c4 100644
> --- a/hw/p7ioc-phb.c
> +++ b/hw/p7ioc-phb.c
> @@ -2740,7 +2740,7 @@ static void p7ioc_phb_err_interrupt(void *data, uint32_t isn)
>
> PHBDBG(p, "Got interrupt 0x%04x\n", isn);
>
> - opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, OPAL_EVENT_PCI_ERROR);
> + opal_pci_eeh_set_evt(p->phb.opal_id);
>
> /* If the PHB is broken, go away */
> if (p->state == P7IOC_PHB_STATE_BROKEN)
> diff --git a/hw/p7ioc.c b/hw/p7ioc.c
> index 2315e81..48a9dc9 100644
> --- a/hw/p7ioc.c
> +++ b/hw/p7ioc.c
> @@ -575,8 +575,11 @@ static void p7ioc_rgc_interrupt(void *data, uint32_t isn)
>
> /* We will notify OS while getting error from GEM */
> if (p7ioc_check_GEM(ioc))
> - opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
> - OPAL_EVENT_PCI_ERROR);
> + /* This is a bit hacky but works - we raise the event
> + on a downstream phb as the OS needs to call
> + opal_pci_next_error for all phbs to ensure all events
> + are cleared anyway. */
> + opal_pci_eeh_set_evt(ioc->phbs[0].phb.opal_id);
> }
>
> static const struct irq_source_ops p7ioc_rgc_irq_ops = {
> diff --git a/include/pci.h b/include/pci.h
> index 9573961..6b3a228 100644
> --- a/include/pci.h
> +++ b/include/pci.h
> @@ -511,4 +511,7 @@ extern void pci_std_swizzle_irq_map(struct dt_node *dt_node,
> extern void pci_init_slots(void);
> extern void pci_reset(void);
>
> +extern void opal_pci_eeh_set_evt(uint64_t phb_id);
> +extern void opal_pci_eeh_clear_evt(uint64_t phb_id);
> +
> #endif /* __PCI_H */
More information about the Skiboot
mailing list