[Skiboot] [PATCH 5/5] hmi: Recover both CAPP units on Naples after malfunction alert
Michael Neuling
mikey at neuling.org
Wed Feb 10 11:59:12 AEDT 2016
On Mon, 2016-02-08 at 16:30 +0100, Philippe Bergheaud wrote:
> In decode_one_malfunction, check the chip type and if Naples, then
> loop
> on both capp units.
Does this mean an error on one card takes down both? That seem broken.
Mikey
>
> Signed-off-by: Philippe Bergheaud <felix at linux.vnet.ibm.com>
> ---
> core/hmi.c | 29 +++++++++++++++++++++--------
> 1 file changed, 21 insertions(+), 8 deletions(-)
>
> diff --git a/core/hmi.c b/core/hmi.c
> index d2cca90..5204bb3 100644
> --- a/core/hmi.c
> +++ b/core/hmi.c
> @@ -242,14 +242,19 @@ static int queue_hmi_event(struct OpalHMIEvent
> *hmi_evt, int recover)
> hmi_data[3]);
> }
>
> -static int is_capp_recoverable(int chip_id)
> +static int is_capp_recoverable(int chip_id, int capp)
> {
> uint64_t reg;
> - xscom_read(chip_id, CAPP_ERR_STATUS_CTRL, ®);
> + uint32_t reg_offset = capp ? CAPP1_REG_OFFSET : 0x0;
> +
> + xscom_read(chip_id, CAPP_ERR_STATUS_CTRL + reg_offset,
> ®);
> return (reg & PPC_BIT(0)) != 0;
> }
>
> -static int handle_capp_recoverable(int chip_id)
> +#define CAPP_PHB3_ATTACHED(chip, phb_index) \
> + (chip->capp_phb3_attached_mask & (1 << phb_index))
> +
> +static int handle_capp_recoverable(int chip_id, int capp)
> {
> struct dt_node *np;
> u64 phb_id;
> @@ -257,14 +262,16 @@ static int handle_capp_recoverable(int chip_id)
> struct phb *phb;
> u32 phb_index;
> struct proc_chip *chip = get_chip(chip_id);
> - u8 mask = chip->capp_phb3_attached_mask;
> + int dual_capp = (chip->type == PROC_CHIP_P8_NAPLES);
>
> dt_for_each_compatible(dt_root, np, "ibm,power8-pciex") {
> dt_chip_id = dt_prop_get_u32(np, "ibm,chip-id");
> phb_index = dt_prop_get_u32(np, "ibm,phb-index");
> phb_id = dt_prop_get_u64(np, "ibm,opal-phbid");
>
> - if ((mask & (1 << phb_index)) && (chip_id ==
> dt_chip_id)) {
> + if ((chip_id == dt_chip_id) &&
> + CAPP_PHB3_ATTACHED(chip, phb_index) &&
> + (!dual_capp || phb_index == capp)) {
> phb = pci_get_phb(phb_id);
> phb->ops->lock(phb);
> phb->ops->set_capp_recovery(phb);
> @@ -277,13 +284,19 @@ static int handle_capp_recoverable(int chip_id)
>
> static int decode_one_malfunction(int flat_chip_id, struct
> OpalHMIEvent *hmi_evt)
> {
> + int capp;
> + int recover = 0;
> + struct proc_chip *chip = get_chip(flat_chip_id);
> + int dual_capp = (chip->type == PROC_CHIP_P8_NAPLES);
> +
> hmi_evt->severity = OpalHMI_SEV_FATAL;
> hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
>
> - if (is_capp_recoverable(flat_chip_id)) {
> - if (handle_capp_recoverable(flat_chip_id) == 0)
> - return 0;
> + for (capp = 0; capp < (dual_capp ? 2 : 1); capp++)
> + if (is_capp_recoverable(flat_chip_id, capp))
> + recover |=
> handle_capp_recoverable(flat_chip_id, capp);
>
> + if (recover) {
> hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
> hmi_evt->type = OpalHMI_ERROR_CAPP_RECOVERY;
> return 1;
More information about the Skiboot
mailing list