[PATCH v4] cxl: mask slice error interrupts after first occurrence
Andrew Donnellan
andrew.donnellan at au1.ibm.com
Mon May 1 12:45:54 AEST 2017
On 01/05/17 10:53, Alastair D'Silva wrote:
> From: Alastair D'Silva <alastair at d-silva.org>
>
> In some situations, a faulty AFU slice may create an interrupt storm of
> slice errors, rendering the machine unusable. Since these interrupts are
> informational only, present the interrupt once, then mask it off to
> prevent it from being retriggered until the AFU is reset.
>
> Signed-off-by: Alastair D'Silva <alastair at d-silva.org>
Thanks for the fixups.
Reviewed-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> ---
> Changelog:
> v4:
> Fix duplicate/missing entries in aggregate macros
> Minor textual changes
> v3
> Add CXL_PSL_SERR_An_IRQS, CXL_PSL_SERR_An_IRQ_MASKS macros
> Explicitly reenable masked interrupts after reset
> Issue an info line that subsequent interrupts will be masked
> v2
> Rebase against linux-next
> ---
> drivers/misc/cxl/cxl.h | 18 ++++++++++++++++++
> drivers/misc/cxl/native.c | 19 +++++++++++++++++--
> 2 files changed, 35 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
> index 452e209..c8568ea 100644
> --- a/drivers/misc/cxl/cxl.h
> +++ b/drivers/misc/cxl/cxl.h
> @@ -228,6 +228,24 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
> #define CXL_PSL_SERR_An_llcmdto (1ull << (63-6))
> #define CXL_PSL_SERR_An_afupar (1ull << (63-7))
> #define CXL_PSL_SERR_An_afudup (1ull << (63-8))
> +#define CXL_PSL_SERR_An_IRQS ( \
> + CXL_PSL_SERR_An_afuto | CXL_PSL_SERR_An_afudis | CXL_PSL_SERR_An_afuov | \
> + CXL_PSL_SERR_An_badsrc | CXL_PSL_SERR_An_badctx | CXL_PSL_SERR_An_llcmdis | \
> + CXL_PSL_SERR_An_llcmdto | CXL_PSL_SERR_An_afupar | CXL_PSL_SERR_An_afudup)
> +#define CXL_PSL_SERR_An_afuto_mask (1ull << (63-32))
> +#define CXL_PSL_SERR_An_afudis_mask (1ull << (63-33))
> +#define CXL_PSL_SERR_An_afuov_mask (1ull << (63-34))
> +#define CXL_PSL_SERR_An_badsrc_mask (1ull << (63-35))
> +#define CXL_PSL_SERR_An_badctx_mask (1ull << (63-36))
> +#define CXL_PSL_SERR_An_llcmdis_mask (1ull << (63-37))
> +#define CXL_PSL_SERR_An_llcmdto_mask (1ull << (63-38))
> +#define CXL_PSL_SERR_An_afupar_mask (1ull << (63-39))
> +#define CXL_PSL_SERR_An_afudup_mask (1ull << (63-40))
> +#define CXL_PSL_SERR_An_IRQ_MASKS ( \
> + CXL_PSL_SERR_An_afuto_mask | CXL_PSL_SERR_An_afudis_mask | CXL_PSL_SERR_An_afuov_mask | \
> + CXL_PSL_SERR_An_badsrc_mask | CXL_PSL_SERR_An_badctx_mask | CXL_PSL_SERR_An_llcmdis_mask | \
> + CXL_PSL_SERR_An_llcmdto_mask | CXL_PSL_SERR_An_afupar_mask | CXL_PSL_SERR_An_afudup_mask)
> +
> #define CXL_PSL_SERR_An_AE (1ull << (63-30))
>
> /****** CXL_PSL_SCNTL_An ****************************************************/
> diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
> index 194c58e..290950b 100644
> --- a/drivers/misc/cxl/native.c
> +++ b/drivers/misc/cxl/native.c
> @@ -95,12 +95,23 @@ int cxl_afu_disable(struct cxl_afu *afu)
> /* This will disable as well as reset */
> static int native_afu_reset(struct cxl_afu *afu)
> {
> + int rc;
> + u64 serr;
> +
> pr_devel("AFU reset request\n");
>
> - return afu_control(afu, CXL_AFU_Cntl_An_RA, 0,
> + rc = afu_control(afu, CXL_AFU_Cntl_An_RA, 0,
> CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled,
> CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK,
> false);
> +
> + /* Re-enable any masked interrupts */
> + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
> + serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
> + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
> +
> +
> + return rc;
> }
>
> static int native_afu_check_and_enable(struct cxl_afu *afu)
> @@ -1205,7 +1216,7 @@ static irqreturn_t native_slice_irq_err(int irq, void *data)
> {
> struct cxl_afu *afu = data;
> u64 errstat, serr, afu_error, dsisr;
> - u64 fir_slice, afu_debug;
> + u64 fir_slice, afu_debug, irq_mask;
>
> /*
> * slice err interrupt is only used with full PSL (no XSL)
> @@ -1226,7 +1237,11 @@ static irqreturn_t native_slice_irq_err(int irq, void *data)
> dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error);
> dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr);
>
> + /* mask off the IRQ so it won't retrigger until the AFU is reset */
> + irq_mask = (serr & CXL_PSL_SERR_An_IRQS) >> 32;
> + serr |= irq_mask;
> cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
> + dev_info(&afu->dev, "Further such interrupts will be masked until the AFU is reset\n");
>
> return IRQ_HANDLED;
> }
>
--
Andrew Donnellan OzLabs, ADL Canberra
andrew.donnellan at au1.ibm.com IBM Australia Limited
More information about the Linuxppc-dev
mailing list