[Skiboot] [PATCH v2 08/10] opal/eeh: Add PHB diag data in error log for PE errors.
Mahesh Salgaonkar
mahesh at linux.ibm.com
Wed Oct 7 23:09:41 AEDT 2020
Gather PHB status (diag data) for corresponding frozen PE and add it to
errorlog. Since we qeury the PHB status during freeze_status/next_error
opal call, we can't use get_diag_data2() which clears the errors. Instead
introduce a new phb_ops which will collect only PHB status required to add
it in errorlog. This will make sure linux also gets proper diag data when
it calls get_diag_data2().
Signed-off-by: Mahesh Salgaonkar <mahesh at linux.ibm.com>
---
core/pci-opal.c | 21 +++++++++++++++++++--
hw/phb3.c | 25 +++++++++++++++++++++----
hw/phb4.c | 24 ++++++++++++++++++++----
include/pci.h | 2 ++
4 files changed, 62 insertions(+), 10 deletions(-)
diff --git a/core/pci-opal.c b/core/pci-opal.c
index 333682a0b..34de05ddf 100644
--- a/core/pci-opal.c
+++ b/core/pci-opal.c
@@ -127,6 +127,9 @@ static void send_phb_freeze_event(struct phb *phb, void *diag_buffer)
static void send_phb_pe_freeze_event(struct phb *phb, uint64_t pe_number)
{
struct errorlog *buf;
+ void *diag_buffer;
+ uint32_t len;
+ int rc;
buf = opal_elog_create(&e_info(OPAL_RC_PCI_PHB_FREEZE), 0);
if (!buf) {
@@ -137,11 +140,25 @@ static void send_phb_pe_freeze_event(struct phb *phb, uint64_t pe_number)
log_append_msg(buf, "PHB#%x PE#%lld Freeze/Fence detected!\n",
phb->opal_id, pe_number);
- /* TODO: Get PHB status data */
+ /* Get PHB status data */
+ len = dt_prop_get_u32(phb->dt_node, "ibm,phb-diag-data-size");
+ diag_buffer = zalloc(len);
+ if (diag_buffer) {
+ rc = phb->ops->get_phb_status(phb, diag_buffer, len);
+ if (rc != OPAL_SUCCESS) {
+ prerror("Failed to gather phb diag data\n");
+ free(diag_buffer);
+ diag_buffer = NULL;
+ }
+ } else
+ prerror("Failed to allocate size for phb diag data\n");
+
/* TODO: Add location info of slot with forzen PEs */
- send_eeh_serviceable_event(phb, buf, NULL);
+ send_eeh_serviceable_event(phb, buf, diag_buffer);
bitmap_set_bit(*phb->pe_freeze_reported, pe_number);
+ if (diag_buffer)
+ free(diag_buffer);
}
static int64_t opal_pci_config_read_half_word_be(uint64_t phb_id,
diff --git a/hw/phb3.c b/hw/phb3.c
index 5465b62ae..1bd753bae 100644
--- a/hw/phb3.c
+++ b/hw/phb3.c
@@ -3457,13 +3457,13 @@ static int64_t phb3_err_inject(struct phb *phb, uint64_t pe_number,
return handler(p, pe_number, addr, mask, is_write);
}
-static int64_t phb3_get_diag_data(struct phb *phb,
+/* Get phb diag data only. Do not clear the phb pending errors. */
+static int64_t phb3_get_phb_status(struct phb *phb,
void *diag_buffer,
uint64_t diag_buffer_len)
{
struct phb3 *p = phb_to_phb3(phb);
struct OpalIoPhb3ErrorData *data = diag_buffer;
- bool fenced;
if (diag_buffer_len < sizeof(struct OpalIoPhb3ErrorData))
return OPAL_PARAMETER;
@@ -3474,10 +3474,26 @@ static int64_t phb3_get_diag_data(struct phb *phb,
* Dummy check for fence so that phb3_read_phb_status knows
* whether to use ASB or AIB
*/
- fenced = phb3_fenced(p);
+ phb3_fenced(p);
phb3_read_phb_status(p, data);
- if (!fenced)
+ return OPAL_SUCCESS;
+}
+
+/* Get phb diag data and clear the phb pending errors. */
+static int64_t phb3_get_diag_data(struct phb *phb,
+ void *diag_buffer,
+ uint64_t diag_buffer_len)
+{
+ int64_t rc;
+ struct phb3 *p = phb_to_phb3(phb);
+ struct OpalIoPhb3ErrorData *data = diag_buffer;
+
+ rc = phb3_get_phb_status(phb, diag_buffer, diag_buffer_len);
+ if (!rc)
+ return rc;
+
+ if (!(p->flags & PHB3_AIB_FENCED))
phb3_eeh_dump_regs(p, data);
/*
@@ -3873,6 +3889,7 @@ static const struct phb_ops phb3_ops = {
.next_error = phb3_eeh_next_error,
.err_inject = phb3_err_inject,
.get_diag_data2 = phb3_get_diag_data,
+ .get_phb_status = phb3_get_phb_status,
.set_capi_mode = phb3_set_capi_mode,
.set_capp_recovery = phb3_set_capp_recovery,
};
diff --git a/hw/phb4.c b/hw/phb4.c
index cd50361fc..a088a640e 100644
--- a/hw/phb4.c
+++ b/hw/phb4.c
@@ -4091,11 +4091,11 @@ static int64_t phb4_err_inject(struct phb *phb, uint64_t pe_number,
return handler(p, pe_number, addr, mask, is_write);
}
-static int64_t phb4_get_diag_data(struct phb *phb,
+/* Get phb diag data only. Do not clear the phb pending errors. */
+static int64_t phb4_get_phb_status(struct phb *phb,
void *diag_buffer,
uint64_t diag_buffer_len)
{
- bool fenced;
struct phb4 *p = phb_to_phb4(phb);
struct OpalIoPhb4ErrorData *data = diag_buffer;
@@ -4108,10 +4108,25 @@ static int64_t phb4_get_diag_data(struct phb *phb,
* Dummy check for fence so that phb4_read_phb_status knows
* whether to use ASB or AIB
*/
- fenced = phb4_fenced(p);
+ phb4_fenced(p);
phb4_read_phb_status(p, data);
- if (!fenced)
+ return OPAL_SUCCESS;
+}
+
+/* Get phb diag data and clear the phb pending errors. */
+static int64_t phb4_get_diag_data(struct phb *phb,
+ void *diag_buffer,
+ uint64_t diag_buffer_len)
+{
+ int64_t rc;
+ struct phb4 *p = phb_to_phb4(phb);
+
+ rc = phb4_get_phb_status(phb, diag_buffer, diag_buffer_len);
+ if (!rc)
+ return rc;
+
+ if (!(p->flags & PHB4_AIB_FENCED))
phb4_eeh_dump_regs(p);
/*
@@ -4938,6 +4953,7 @@ static const struct phb_ops phb4_ops = {
.next_error = phb4_eeh_next_error,
.err_inject = phb4_err_inject,
.get_diag_data2 = phb4_get_diag_data,
+ .get_phb_status = phb4_get_phb_status,
.tce_kill = phb4_tce_kill,
.set_capi_mode = phb4_set_capi_mode,
.set_p2p = phb4_set_p2p,
diff --git a/include/pci.h b/include/pci.h
index d8f712e72..8da57a954 100644
--- a/include/pci.h
+++ b/include/pci.h
@@ -254,6 +254,8 @@ struct phb_ops {
uint64_t mask);
int64_t (*get_diag_data2)(struct phb *phb, void *diag_buffer,
uint64_t diag_buffer_len);
+ int64_t (*get_phb_status)(struct phb *phb, void *diag_buffer,
+ uint64_t diag_buffer_len);
int64_t (*next_error)(struct phb *phb, uint64_t *first_frozen_pe,
uint16_t *pci_error_type, uint16_t *severity);
More information about the Skiboot
mailing list