[Skiboot] [PATCH v2] xscom: Trace XSCOMs that take more than 10us
Stewart Smith
stewart at linux.vnet.ibm.com
Thu Sep 8 17:22:40 AEST 2016
From: Benjamin Herrenschmidt <benh at kernel.crashing.org>
If a XSCOM takes more than 10us, this will display an entry in the OPAL
log, with the PCB address, the time, and a number which is either an
error code (negative: the access failed) or a number of retries
(positive: the number of times the HMER status returned "1").
We need to suspend that testing while the timebase is being resynchronized
otherwise we will get bogus values.
Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
[stewart at linux.vnet.ibm.com: bump to 20us, PR_DEBUG rather than printf]
Signed-off-by: Stewart Smith <stewart at linux.vnet.ibm.com>
---
hw/chiptod.c | 12 ++++++---
hw/xscom.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++-----------
include/xscom.h | 5 ++++
3 files changed, 80 insertions(+), 18 deletions(-)
diff --git a/hw/chiptod.c b/hw/chiptod.c
index f647830aab32..e0fe397eb69b 100644
--- a/hw/chiptod.c
+++ b/hw/chiptod.c
@@ -635,13 +635,15 @@ static bool chiptod_to_tb(void)
return false;
}
+ xscom_set_tb_unreliable(true);
+
/* Make us ready to get the TB from the chipTOD */
mtspr(SPR_TFMR, base_tfmr | SPR_TFMR_MOVE_CHIP_TOD_TO_TB);
/* Tell the ChipTOD to send it */
if (xscom_writeme(TOD_CHIPTOD_TO_TB, PPC_BIT(0))) {
prerror("XSCOM error writing CHIPTOD_TO_TB\n");
- return false;
+ goto fail;
}
/* Wait for it to complete */
@@ -649,16 +651,20 @@ static bool chiptod_to_tb(void)
do {
if (++timeout >= TIMEOUT_LOOPS) {
prerror("Chip to TB timeout\n");
- return false;
+ goto fail;
}
tfmr = mfspr(SPR_TFMR);
if (tfmr & SPR_TFMR_TFMR_CORRUPT) {
prerror("MoveToTB: corrupt TFMR !\n");
- return false;
+ goto fail;
}
} while(tfmr & SPR_TFMR_MOVE_CHIP_TOD_TO_TB);
+ xscom_set_tb_unreliable(false);
return true;
+ fail:
+ xscom_set_tb_unreliable(false);
+ return false;
}
static bool chiptod_check_tb_running(void)
diff --git a/hw/xscom.c b/hw/xscom.c
index 9e9dcee49bd8..6e2b5772a105 100644
--- a/hw/xscom.c
+++ b/hw/xscom.c
@@ -25,6 +25,9 @@
#include <opal-api.h>
#include <timebase.h>
+/* Delay after which we warn that a XSCOM is taking a long time */
+#define XSCOM_DELAY_WARN_THRESHOLD (usecs_to_tb(20))
+
/* Mask of bits to clear in HMER before an access */
#define HMER_CLR_MASK (~(SPR_HMER_XSCOM_FAIL | \
SPR_HMER_XSCOM_DONE | \
@@ -61,6 +64,7 @@ static struct {
* use a global lock instead
*/
static struct lock xscom_lock = LOCK_UNLOCKED;
+static int64_t xscom_tb_unreliable;
static inline void *xscom_addr(uint32_t gcid, uint32_t pcb_addr)
{
@@ -220,7 +224,7 @@ static bool xscom_gcid_ok(uint32_t gcid)
* Low level XSCOM access functions, perform a single direct xscom
* access via MMIO
*/
-static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
+static int64_t __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
{
uint64_t hmer;
int64_t ret, retries;
@@ -244,7 +248,7 @@ static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
/* Check for error */
if (!(hmer & SPR_HMER_XSCOM_FAIL))
- return OPAL_SUCCESS;
+ return retries;
/* Handle error and possibly eventually retry */
ret = xscom_handle_error(hmer, gcid, pcb_addr, false, retries);
@@ -256,7 +260,7 @@ static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
return ret;
}
-static int __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val)
+static int64_t __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val)
{
uint64_t hmer;
int64_t ret, retries = 0;
@@ -280,7 +284,7 @@ static int __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val)
/* Check for error */
if (!(hmer & SPR_HMER_XSCOM_FAIL))
- return OPAL_SUCCESS;
+ return retries;
/* Handle error and possibly eventually retry */
ret = xscom_handle_error(hmer, gcid, pcb_addr, true, retries);
@@ -295,11 +299,12 @@ static int __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val)
/*
* Indirect XSCOM access functions
*/
-static int xscom_indirect_read(uint32_t gcid, uint64_t pcb_addr, uint64_t *val)
+static int64_t xscom_indirect_read(uint32_t gcid, uint64_t pcb_addr, uint64_t *val)
{
uint32_t addr;
uint64_t data;
int rc, retries;
+ int64_t comp_retries = 0;
if (proc_gen < proc_gen_p8) {
*val = (uint64_t)-1;
@@ -311,14 +316,16 @@ static int xscom_indirect_read(uint32_t gcid, uint64_t pcb_addr, uint64_t *val)
data = XSCOM_DATA_IND_READ |
(pcb_addr & XSCOM_ADDR_IND_ADDR);
rc = __xscom_write(gcid, addr, data);
- if (rc)
+ if (rc < 0)
goto bail;
+ comp_retries = rc;
/* Wait for completion */
for (retries = 0; retries < XSCOM_IND_MAX_RETRIES; retries++) {
rc = __xscom_read(gcid, addr, &data);
- if (rc)
+ if (rc < 0)
goto bail;
+ comp_retries += rc;
if ((data & XSCOM_DATA_IND_COMPLETE) &&
((data & XSCOM_DATA_IND_ERR) == 0)) {
*val = data & XSCOM_DATA_IND_DATA;
@@ -333,16 +340,19 @@ static int xscom_indirect_read(uint32_t gcid, uint64_t pcb_addr, uint64_t *val)
}
}
bail:
- if (rc)
+ if (rc < 0) {
*val = (uint64_t)-1;
- return rc;
+ return rc;
+ }
+ return comp_retries;
}
-static int xscom_indirect_write(uint32_t gcid, uint64_t pcb_addr, uint64_t val)
+static int64_t xscom_indirect_write(uint32_t gcid, uint64_t pcb_addr, uint64_t val)
{
uint32_t addr;
uint64_t data;
int rc, retries;
+ int64_t comp_retries = 0;
if (proc_gen < proc_gen_p8)
return OPAL_UNSUPPORTED;
@@ -353,14 +363,16 @@ static int xscom_indirect_write(uint32_t gcid, uint64_t pcb_addr, uint64_t val)
data |= val & XSCOM_ADDR_IND_DATA;
rc = __xscom_write(gcid, addr, data);
- if (rc)
+ if (rc < 0)
goto bail;
+ comp_retries = rc;
/* Wait for completion */
for (retries = 0; retries < XSCOM_IND_MAX_RETRIES; retries++) {
rc = __xscom_read(gcid, addr, &data);
- if (rc)
+ if (rc < 0)
goto bail;
+ comp_retries += rc;
if ((data & XSCOM_DATA_IND_COMPLETE) &&
((data & XSCOM_DATA_IND_ERR) == 0))
break;
@@ -373,7 +385,9 @@ static int xscom_indirect_write(uint32_t gcid, uint64_t pcb_addr, uint64_t val)
}
}
bail:
- return rc;
+ if (rc < 0)
+ return rc;
+ return comp_retries;
}
static uint32_t xscom_decode_chiplet(uint32_t partid, uint64_t *pcb_addr)
@@ -397,8 +411,9 @@ static uint32_t xscom_decode_chiplet(uint32_t partid, uint64_t *pcb_addr)
*/
int xscom_read(uint32_t partid, uint64_t pcb_addr, uint64_t *val)
{
+ uint64_t t_begin, t_end;
uint32_t gcid;
- int rc;
+ int64_t rc;
/* Handle part ID decoding */
switch(partid >> 28) {
@@ -426,15 +441,27 @@ int xscom_read(uint32_t partid, uint64_t pcb_addr, uint64_t *val)
/* HW822317 requires us to do global locking */
lock(&xscom_lock);
+ t_begin = mftb();
/* Direct vs indirect access */
if (pcb_addr & XSCOM_ADDR_IND_FLAG)
rc = xscom_indirect_read(gcid, pcb_addr, val);
else
rc = __xscom_read(gcid, pcb_addr & 0x7fffffff, val);
+ t_end = mftb();
+
+ /* Check if we are told not to bother about warning on long delays... */
+ if (xscom_tb_unreliable)
+ t_end = t_begin;
/* Unlock it */
unlock(&xscom_lock);
+
+ if (tb_compare(t_begin + XSCOM_DELAY_WARN_THRESHOLD, t_end) == TB_ABEFOREB)
+ prlog(PR_DEBUG, "XSCOM Read from %08llx took %ld us with %lld (err/retries)\n",
+ pcb_addr, tb_to_usecs(t_end - t_begin), rc);
+ if (rc > 0)
+ rc = 0;
return rc;
}
@@ -442,8 +469,9 @@ opal_call(OPAL_XSCOM_READ, xscom_read, 3);
int xscom_write(uint32_t partid, uint64_t pcb_addr, uint64_t val)
{
+ uint64_t t_begin, t_end;
uint32_t gcid;
- int rc;
+ int64_t rc;
/* Handle part ID decoding */
switch(partid >> 28) {
@@ -469,19 +497,42 @@ int xscom_write(uint32_t partid, uint64_t pcb_addr, uint64_t val)
/* HW822317 requires us to do global locking */
lock(&xscom_lock);
+ t_begin = mftb();
/* Direct vs indirect access */
if (pcb_addr & XSCOM_ADDR_IND_FLAG)
rc = xscom_indirect_write(gcid, pcb_addr, val);
else
rc = __xscom_write(gcid, pcb_addr & 0x7fffffff, val);
+ t_end = mftb();
+
+ /* Check if we are told not to bother about warning on long delays... */
+ if (xscom_tb_unreliable)
+ t_end = t_begin;
/* Unlock it */
unlock(&xscom_lock);
+
+ if (tb_compare(t_begin + XSCOM_DELAY_WARN_THRESHOLD, t_end) == TB_ABEFOREB)
+ printf("XSCOM Write to %08llx took %ld us with %lld (err/retries)\n",
+ pcb_addr, tb_to_usecs(t_end - t_begin), rc);
+ if (rc > 0)
+ rc = 0;
+
return rc;
}
opal_call(OPAL_XSCOM_WRITE, xscom_write, 3);
+void xscom_set_tb_unreliable(bool unreliable)
+{
+ lock(&xscom_lock);
+ if (unreliable)
+ xscom_tb_unreliable++;
+ else if (xscom_tb_unreliable)
+ xscom_tb_unreliable--;
+ unlock(&xscom_lock);
+}
+
int xscom_readme(uint64_t pcb_addr, uint64_t *val)
{
return xscom_read(this_cpu()->chip_id, pcb_addr, val);
diff --git a/include/xscom.h b/include/xscom.h
index f9550218d4f5..4457b82f9f6a 100644
--- a/include/xscom.h
+++ b/include/xscom.h
@@ -220,6 +220,11 @@ extern void xscom_init(void);
/* Mark XSCOM lock as being in console path */
extern void xscom_used_by_console(void);
+/* Mark XSCOM timebase warnings unreliable (used by the chiptod
+ * code when TB is being resynchronized
+ */
+extern void xscom_set_tb_unreliable(bool unreliable);
+
/* Returns true if XSCOM can be used. Typically this returns false if
* the current CPU holds the XSCOM lock (to avoid re-entrancy from error path).
*/
--
2.1.4
More information about the Skiboot
mailing list