[Skiboot] [PATCH v7 17/22] fadump: Add support to trigger memory preserving IPL on BMC system

Fri May 10 11:50:06 AEST 2019

On Sat, Apr 13, 2019 at 7:20 PM Vasant Hegde
<hegdevasant at linux.vnet.ibm.com> wrote:
>
> On FSP based system we call 'attn' instruction. FSP detects attention and
> initiates memory preserving IPL. On BMC system we have to call SBE S0
> interrupt to initiate memory preserving IPL.
>
> This patch adds support to call SBE S0 interrupt in assert path.
> Sequence :
>   - S0 interrupt on slave chip SBE
>   - S0 interrupt on master chip SBE
>
> Note that this is hooked to ipmi_terminate path. We have HDAT flag for MPIPL
> support. If MPIPL is not supported then we don't create 'ibm,opal/dump' node
> and we will fall back to existing termination flow.
>
> Note:
>   - It uses xscom node to detect master and slave chip. So that we can
>     trigger early MPIPL (even before initializing SBE driver).
>   - At present we do not have a proper way to detect SBE is alive or not.
>     So we wait for predefined time and then call normal reboot.
>
> Signed-off-by: Vasant Hegde <hegdevasant at linux.vnet.ibm.com>
> ---
>  hw/ipmi/ipmi-attn.c |  9 +++++++-
>  hw/sbe-p9.c         | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/sbe-p9.h    | 10 +++++++++
>  3 files changed, 81 insertions(+), 1 deletion(-)
>
> diff --git a/hw/ipmi/ipmi-attn.c b/hw/ipmi/ipmi-attn.c
> index c6c1c59b5..ae97e92ab 100644
> --- a/hw/ipmi/ipmi-attn.c
> +++ b/hw/ipmi/ipmi-attn.c
> @@ -1,4 +1,4 @@
> -/* Copyright 2015 IBM Corp.
> +/* Copyright 2015-2019 IBM Corp.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -19,6 +19,7 @@
>  #include <pel.h>
>  #include <platform.h>
>  #include <processor.h>
> +#include <sbe-p9.h>
>  #include <skiboot.h>
>  #include <stack.h>
>  #include <timebase.h>
> @@ -65,6 +66,12 @@ static void ipmi_log_terminate_event(const char *msg)
>

>  void __attribute__((noreturn)) ipmi_terminate(const char *msg)
>  {
> +       /*
> +        * If fadump is supported then trigger SBE interrupt
> +        * to initiate fadump
> +        */
> +       p9_sbe_terminate();

Is this safe to do if the rest of the system doesn't support it?

>         /* Terminate called before initializing IPMI (early abort) */
>         if (!ipmi_present()) {
>                 if (platform.cec_reboot)

The context is omitted in the patch but the ipmi_present() check here
is used to determine if we should send an eSEL to report the error
before rebooting. You probably want to do the same prior to an MPIPL.

> diff --git a/hw/sbe-p9.c b/hw/sbe-p9.c
> index ba7691f50..694375a5a 100644
> --- a/hw/sbe-p9.c
> +++ b/hw/sbe-p9.c
> @@ -948,3 +948,66 @@ void p9_sbe_init(void)
>         /* Initiate SBE timeout poller */
>         opal_add_poller(p9_sbe_timeout_poll, NULL);
>  }
> +
> +/* Terminate and initiate MPIPL */
> +void p9_sbe_terminate(void)
> +{
> +       uint32_t chip_id;
> +       uint32_t primary_chip = -1;
> +       int rc;
> +       u64 wait_tb;
> +       struct dt_node *xn;
> +

> +       if (proc_gen < proc_gen_p9)
> +               return;

Redundant. Either MPIPL is supported and the dump node exists, or it's
not supported and the subsequent check is going to bail.

> +
> +       /* Return if MPIPL is not supported */
> +       if (!dt_find_by_path(opal_node, "dump"))
> +               return;

> +       dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
> +               chip_id = dt_get_chip_id(xn);
Why are we iterating the device-tree directly here instead of the
proc_chip list?

> +
> +               if (dt_has_node_property(xn, "primary", NULL)) {
> +                       primary_chip = chip_id;
> +                       continue;
> +               }
> +
> +               rc = xscom_write(chip_id,
> +                                SBE_CONTROL_REG_RW, SBE_CONTROL_REG_S0);
> +               /* Initiate normal reboot */
> +               if (rc) {
> +                       prlog(PR_ERR, "Failed to write S0 interrupt [chip id = %x]\n",
> +                             chip_id);
> +                       return;
> +               }
> +       }
> +
> +       if (primary_chip == -1) {
> +               prlog(PR_ERR, "Master chip ID not found.\n");
> +               return;
> +       }
> +
> +       /* Write S0 interrupt on master SBE */
> +       rc = xscom_write(primary_chip,
> +                        SBE_CONTROL_REG_RW, SBE_CONTROL_REG_S0);
> +       /* Initiate normal reboot */
> +       if (rc) {
> +               prlog(PR_ERR, "Failed to write S0 interrupt [chip id = %x]\n",
> +                     primary_chip);
> +               return;
> +       }
> +

> +       /* XXX We do not have a way to detect SBE state. Hence wait for max
> +        *     time SBE takes to respond and then trigger normal reboot.
> +        */
> +       prlog(PR_NOTICE, "Initiated MPIPL, waiting for SBE to respond...\n");
> +       wait_tb = mftb() + msecs_to_tb(SBE_CMD_TIMEOUT_MAX);
> +       while (mftb() < wait_tb) {
> +               cpu_relax();
> +       }
> +
> +       prlog(PR_ERR, "SBE did not respond within timeout period (%d secs).\n",
> +             SBE_CMD_TIMEOUT_MAX / 1000);

/* Worst case command timeout value (90 sec) */
#define SBE_CMD_TIMEOUT_MAX                     (90 * 1000)

This seems excessive. Also add a comment or something explaining that
the loop is not supposed to terminate rather than just hinting at it.

> +       prlog(PR_ERR, "Falling back to normal reboot\n");
> +}
> diff --git a/include/sbe-p9.h b/include/sbe-p9.h
> index ac1ec219b..9ea7d294f 100644
> --- a/include/sbe-p9.h
> +++ b/include/sbe-p9.h
> @@ -103,6 +103,13 @@
>  #define SBE_HOST_TIMER_EXPIRY          PPC_BIT(14)
>  #define SBE_HOST_RESPONSE_MASK         (PPC_BITMASK(0, 4) | SBE_HOST_TIMER_EXPIRY)
>
> +/* SBE Control Register */
> +#define SBE_CONTROL_REG_RW             0x00050008
> +
> +/* SBE interrupt s0/s1 bits */
> +#define SBE_CONTROL_REG_S0             PPC_BIT(14)
> +#define SBE_CONTROL_REG_S1             PPC_BIT(15)
> +
>  /* SBE Target Type */
>  #define SBE_TARGET_TYPE_PROC           0x00
>  #define SBE_TARGET_TYPE_EX             0x01
> @@ -243,4 +250,7 @@ extern void p9_sbe_update_timer_expiry(uint64_t new_target);
>  /* Send skiboot relocated base address to SBE */
>  extern void p9_sbe_send_relocated_base(uint64_t reloc_base);
>
> +/* Terminate and trigger MPIPL */
> +extern void p9_sbe_terminate(void);
> +
>  #endif /* __SBE_P9_H */
> --
> 2.14.3
>
> _______________________________________________
> Skiboot mailing list
> Skiboot at lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot