[Skiboot] [PATCH v7 17/22] fadump: Add support to trigger memory preserving IPL on BMC system
Oliver
oohall at gmail.com
Fri May 10 11:50:06 AEST 2019
On Sat, Apr 13, 2019 at 7:20 PM Vasant Hegde
<hegdevasant at linux.vnet.ibm.com> wrote:
>
> On FSP based system we call 'attn' instruction. FSP detects attention and
> initiates memory preserving IPL. On BMC system we have to call SBE S0
> interrupt to initiate memory preserving IPL.
>
> This patch adds support to call SBE S0 interrupt in assert path.
> Sequence :
> - S0 interrupt on slave chip SBE
> - S0 interrupt on master chip SBE
>
> Note that this is hooked to ipmi_terminate path. We have HDAT flag for MPIPL
> support. If MPIPL is not supported then we don't create 'ibm,opal/dump' node
> and we will fall back to existing termination flow.
>
> Note:
> - It uses xscom node to detect master and slave chip. So that we can
> trigger early MPIPL (even before initializing SBE driver).
> - At present we do not have a proper way to detect SBE is alive or not.
> So we wait for predefined time and then call normal reboot.
>
> Signed-off-by: Vasant Hegde <hegdevasant at linux.vnet.ibm.com>
> ---
> hw/ipmi/ipmi-attn.c | 9 +++++++-
> hw/sbe-p9.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> include/sbe-p9.h | 10 +++++++++
> 3 files changed, 81 insertions(+), 1 deletion(-)
>
> diff --git a/hw/ipmi/ipmi-attn.c b/hw/ipmi/ipmi-attn.c
> index c6c1c59b5..ae97e92ab 100644
> --- a/hw/ipmi/ipmi-attn.c
> +++ b/hw/ipmi/ipmi-attn.c
> @@ -1,4 +1,4 @@
> -/* Copyright 2015 IBM Corp.
> +/* Copyright 2015-2019 IBM Corp.
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> @@ -19,6 +19,7 @@
> #include <pel.h>
> #include <platform.h>
> #include <processor.h>
> +#include <sbe-p9.h>
> #include <skiboot.h>
> #include <stack.h>
> #include <timebase.h>
> @@ -65,6 +66,12 @@ static void ipmi_log_terminate_event(const char *msg)
>
> void __attribute__((noreturn)) ipmi_terminate(const char *msg)
> {
> + /*
> + * If fadump is supported then trigger SBE interrupt
> + * to initiate fadump
> + */
> + p9_sbe_terminate();
Is this safe to do if the rest of the system doesn't support it?
> /* Terminate called before initializing IPMI (early abort) */
> if (!ipmi_present()) {
> if (platform.cec_reboot)
The context is omitted in the patch but the ipmi_present() check here
is used to determine if we should send an eSEL to report the error
before rebooting. You probably want to do the same prior to an MPIPL.
> diff --git a/hw/sbe-p9.c b/hw/sbe-p9.c
> index ba7691f50..694375a5a 100644
> --- a/hw/sbe-p9.c
> +++ b/hw/sbe-p9.c
> @@ -948,3 +948,66 @@ void p9_sbe_init(void)
> /* Initiate SBE timeout poller */
> opal_add_poller(p9_sbe_timeout_poll, NULL);
> }
> +
> +/* Terminate and initiate MPIPL */
> +void p9_sbe_terminate(void)
> +{
> + uint32_t chip_id;
> + uint32_t primary_chip = -1;
> + int rc;
> + u64 wait_tb;
> + struct dt_node *xn;
> +
> + if (proc_gen < proc_gen_p9)
> + return;
Redundant. Either MPIPL is supported and the dump node exists, or it's
not supported and the subsequent check is going to bail.
> +
> + /* Return if MPIPL is not supported */
> + if (!dt_find_by_path(opal_node, "dump"))
> + return;
> + dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
> + chip_id = dt_get_chip_id(xn);
Why are we iterating the device-tree directly here instead of the
proc_chip list?
> +
> + if (dt_has_node_property(xn, "primary", NULL)) {
> + primary_chip = chip_id;
> + continue;
> + }
> +
> + rc = xscom_write(chip_id,
> + SBE_CONTROL_REG_RW, SBE_CONTROL_REG_S0);
> + /* Initiate normal reboot */
> + if (rc) {
> + prlog(PR_ERR, "Failed to write S0 interrupt [chip id = %x]\n",
> + chip_id);
> + return;
> + }
> + }
> +
> + if (primary_chip == -1) {
> + prlog(PR_ERR, "Master chip ID not found.\n");
> + return;
> + }
> +
> + /* Write S0 interrupt on master SBE */
> + rc = xscom_write(primary_chip,
> + SBE_CONTROL_REG_RW, SBE_CONTROL_REG_S0);
> + /* Initiate normal reboot */
> + if (rc) {
> + prlog(PR_ERR, "Failed to write S0 interrupt [chip id = %x]\n",
> + primary_chip);
> + return;
> + }
> +
> + /* XXX We do not have a way to detect SBE state. Hence wait for max
> + * time SBE takes to respond and then trigger normal reboot.
> + */
> + prlog(PR_NOTICE, "Initiated MPIPL, waiting for SBE to respond...\n");
> + wait_tb = mftb() + msecs_to_tb(SBE_CMD_TIMEOUT_MAX);
> + while (mftb() < wait_tb) {
> + cpu_relax();
> + }
> +
> + prlog(PR_ERR, "SBE did not respond within timeout period (%d secs).\n",
> + SBE_CMD_TIMEOUT_MAX / 1000);
/* Worst case command timeout value (90 sec) */
#define SBE_CMD_TIMEOUT_MAX (90 * 1000)
This seems excessive. Also add a comment or something explaining that
the loop is not supposed to terminate rather than just hinting at it.
> + prlog(PR_ERR, "Falling back to normal reboot\n");
> +}
> diff --git a/include/sbe-p9.h b/include/sbe-p9.h
> index ac1ec219b..9ea7d294f 100644
> --- a/include/sbe-p9.h
> +++ b/include/sbe-p9.h
> @@ -103,6 +103,13 @@
> #define SBE_HOST_TIMER_EXPIRY PPC_BIT(14)
> #define SBE_HOST_RESPONSE_MASK (PPC_BITMASK(0, 4) | SBE_HOST_TIMER_EXPIRY)
>
> +/* SBE Control Register */
> +#define SBE_CONTROL_REG_RW 0x00050008
> +
> +/* SBE interrupt s0/s1 bits */
> +#define SBE_CONTROL_REG_S0 PPC_BIT(14)
> +#define SBE_CONTROL_REG_S1 PPC_BIT(15)
> +
> /* SBE Target Type */
> #define SBE_TARGET_TYPE_PROC 0x00
> #define SBE_TARGET_TYPE_EX 0x01
> @@ -243,4 +250,7 @@ extern void p9_sbe_update_timer_expiry(uint64_t new_target);
> /* Send skiboot relocated base address to SBE */
> extern void p9_sbe_send_relocated_base(uint64_t reloc_base);
>
> +/* Terminate and trigger MPIPL */
> +extern void p9_sbe_terminate(void);
> +
> #endif /* __SBE_P9_H */
> --
> 2.14.3
>
> _______________________________________________
> Skiboot mailing list
> Skiboot at lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot
More information about the Skiboot
mailing list