[Skiboot] [PATCH v3] Add purging CPU L2 and L3 caches into NPU hreset.
Alexey Kardashevskiy
aik at ozlabs.ru
Tue Dec 4 12:01:50 AEDT 2018
On 03/12/2018 17:49, Rashmica Gupta wrote:
> If a GPU is passed through to a guest and the guest unexpectedly terminates,
> there can be cache lines in CPUs that belong to the GPU. So purge the caches
> as part of the reset sequence. L1 is write through, so doesn't need to be purged.
>
> This also needs to be called if the guest reboots so call it in
> npu2_dev_cfg_exp_devcap().
>
> The sequence to purge the L2 and L3 caches from the hw team:
>
> "L2 purge:
> (1) initiate purge
> putspy pu.ex EXP.L2.L2MISC.L2CERRS.PRD_PURGE_CMD_TYPE L2CAC_FLUSH -all
> putspy pu.ex EXP.L2.L2MISC.L2CERRS.PRD_PURGE_CMD_TRIGGER ON -all
>
> (2) check this is off in all caches to know purge completed
> getspy pu.ex EXP.L2.L2MISC.L2CERRS.PRD_PURGE_CMD_REG_BUSY -all
>
> (3) putspy pu.ex EXP.L2.L2MISC.L2CERRS.PRD_PURGE_CMD_TRIGGER OFF -all
>
> L3 purge:
> 1) Start the purge:
> putspy pu.ex EXP.L3.L3_MISC.L3CERRS.L3_PRD_PURGE_TTYPE FULL_PURGE -all
> putspy pu.ex EXP.L3.L3_MISC.L3CERRS.L3_PRD_PURGE_REQ ON -all
>
> 2) Ensure that the purge has completed by checking the status bit:
> getspy pu.ex EXP.L3.L3_MISC.L3CERRS.L3_PRD_PURGE_REQ -all
>
> You should see it say OFF if it's done:
> p9n.ex k0:n0:s0:p00:c0
> EXP.L3.L3_MISC.L3CERRS.L3_PRD_PURGE_REQ
> OFF"
>
> Suggested-by: Alistair Popple <alistair at popple.id.au>
> Signed-off-by: Rashmica Gupta <rashmica.g at gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik at ozlabs.ru>
It is correct as it is but there is also some room for bikeshedding, see
below :)
> ---
>
> This is done synchronously for now as it doesn't seem to take *too* long
> (purging the L2 and L3 caches after building the 4.16 linux kernel on a p9
> with 16 cores took 1.57 ms, 1.49ms and 1.46ms).
>
>
> hw/npu2.c | 135 +++++++++++++++++++++++++++++++++++++++++++-
> include/npu2-regs.h | 11 ++++
> 2 files changed, 145 insertions(+), 1 deletion(-)
>
> diff --git a/hw/npu2.c b/hw/npu2.c
> index 30049f5b..9c0e6114 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -326,6 +326,136 @@ static int64_t npu2_dev_cfg_bar(void *dev, struct pci_cfg_reg_filter *pcrf,
> return npu2_cfg_read_bar(ndev, pcrf, offset, len, data);
> }
>
> +static int start_l2_purge(uint32_t chip_id, uint32_t core_id)
> +{
> + int rc;
> + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG);
> +
> + rc = xscom_write_mask(chip_id, addr, L2CAC_FLUSH,
> + L2_PRD_PURGE_CMD_TYPE_MASK);
> + if (!rc)
> + rc = xscom_write_mask(chip_id, addr, L2_PRD_PURGE_CMD_TRIGGER,
> + L2_PRD_PURGE_CMD_TRIGGER);
> + if (rc)
> + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write_mask "
> + "failed %i\n", core_id, rc);
> + return rc;
> +}
> +
> +static int wait_l2_purge(uint32_t chip_id, uint32_t core_id)
> +{
> + int rc;
> + unsigned long now = mftb();
> + unsigned long end = now + msecs_to_tb(2);
> + uint64_t val = L2_PRD_PURGE_CMD_REG_BUSY;
> + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG);
> +
> + while (val & L2_PRD_PURGE_CMD_REG_BUSY) {
This check is pointless as you never break out of the loop because of it
(you do it explicitly below), could be as simple while(1) or for(;;).
> + rc = xscom_read(chip_id, addr, &val);
> + if (rc) {
> + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM read "
> + "failed %i\n", core_id, rc);
> + break;
> + }
> + if (!(val & L2_PRD_PURGE_CMD_REG_BUSY))
> + break;
> + now = mftb();
> + if (tb_compare(now, end) == TB_AAFTERB) {
> + prlog(PR_ERR, "PURGE L2 on core 0x%x timed out %i\n",
> + core_id, rc);
> + return OPAL_BUSY;
> + }
> + }
> +
> + /* We have to clear the trigger bit ourselves */
> + val &= ~L2_PRD_PURGE_CMD_TRIGGER;
> + rc = xscom_write(chip_id, addr, val);
> + if (rc)
> + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write failed %i\n",
> + core_id, rc);
> + return rc;
> +}
> +
> +static int start_l3_purge(uint32_t chip_id, uint32_t core_id)
> +{
> + int rc;
> + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG);
> +
> + rc = xscom_write_mask(chip_id, addr, L3_FULL_PURGE,
> + L3_PRD_PURGE_TTYPE_MASK);
> + if (!rc)
> + rc = xscom_write_mask(chip_id, addr, L3_PRD_PURGE_REQ,
> + L3_PRD_PURGE_REQ);
> + if (rc)
> + prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM write_mask "
> + "failed %i\n", core_id, rc);
> + return rc;
> +}
> +
> +static int wait_l3_purge(uint32_t chip_id, uint32_t core_id)
> +{
> + int rc;
> + unsigned long now = mftb();
> + unsigned long end = now + msecs_to_tb(2);
> + uint64_t val = L3_PRD_PURGE_REQ;
> + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG);
> +
> + /* Trigger bit is automatically set to zero when flushing is done */
> + while (val & L3_PRD_PURGE_REQ) {
Same here.
> + rc = xscom_read(chip_id, addr, &val);
> + if (rc) {
> + prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM read "
> + "failed %i\n", core_id, rc);
> + break;
> + }
> + if (!(val & L3_PRD_PURGE_REQ))
> + break;
> + now = mftb();
> + if (tb_compare(now, end) == TB_AAFTERB) {
> + prlog(PR_ERR, "PURGE L3 on core 0x%x timed out %i\n",
> + core_id, rc);
> + return OPAL_BUSY;
> + }
> + }
> + return rc;
> +}
> +
> +static int64_t purge_l2_l3_caches(void)
> +{
> + struct cpu_thread *t;
> + uint64_t core_id, prev_core_id = (uint64_t)-1;
> +
> + for_each_ungarded_cpu(t) {
> + /* Only need to do it once per core chiplet */
> + core_id = pir_to_core_id(t->pir);
> + if (prev_core_id == core_id)
> + continue;
> + prev_core_id = core_id;
> + if (start_l2_purge(t->chip_id, core_id))
> + goto out;
start_l2_purge() can return OPAL_BUSY but can also return other errors
from xscom_read()/xscom_write() but you convert them all to "busy" anyway.
A better approach would be:
ret = start_l2_purge(t->chip_id, core_id);
if (ret)
goto out;
and...
> + if (start_l3_purge(t->chip_id, core_id))
> + goto out;
> + }
> +
> + prev_core_id = (uint64_t)-1;
> + for_each_ungarded_cpu(t) {
> + /* Only need to do it once per core chiplet */
> + core_id = pir_to_core_id(t->pir);
> + if (prev_core_id == core_id)
> + continue;
> + prev_core_id = core_id;
> +
> + if (wait_l2_purge(t->chip_id, core_id))
> + goto out;
> + if (wait_l3_purge(t->chip_id, core_id))
> + goto out;
> + }
> + return OPAL_SUCCESS;
> +out:
> + prlog(PR_ERR, "Failed on core: 0x%llx\n", core_id);
> + return OPAL_BUSY_EVENT;
... return ret here.
> +}
> +
> static int64_t npu2_dev_cfg_exp_devcap(void *dev,
> struct pci_cfg_reg_filter *pcrf __unused,
> uint32_t offset, uint32_t size,
> @@ -346,6 +476,9 @@ static int64_t npu2_dev_cfg_exp_devcap(void *dev,
> if (*data & PCICAP_EXP_DEVCTL_FUNC_RESET)
> npu2_dev_procedure_reset(ndev);
>
> + if (purge_l2_l3_caches())
> + return OPAL_BUSY_EVENT;
We do not want to purge caches every time we touch this capability. It
is not going to be often in practice but I'd think more often than just
cases when a driver wants to reset a device.
Also you convert every possible error from purge_l2_l3_caches() to just
"busy" while there are more options.
ret = purge_l2_l3_caches();
if (ret)
return ret;
We have more than 30 error codes, let them be used :)
> +
> return OPAL_PARTIAL;
> }
>
> @@ -1125,7 +1258,7 @@ static int64_t npu2_hreset(struct pci_slot *slot __unused)
> reset_ntl(ndev);
> }
> }
> - return OPAL_SUCCESS;
> + return purge_l2_l3_caches();
> }
>
> static int64_t npu2_freset(struct pci_slot *slot __unused)
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index 10a28166..8273b2be 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -756,4 +756,15 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
> #define OB3_ODL0_ENDPOINT_INFO 0xC010832
> #define OB3_ODL1_ENDPOINT_INFO 0xC010833
>
> +/* Registers and bits used to clear the L2 and L3 cache */
> +#define L2_PRD_PURGE_CMD_REG 0x1080E
> +#define L2_PRD_PURGE_CMD_REG_BUSY 0x0040000000000000
> +#define L2_PRD_PURGE_CMD_TYPE_MASK PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3) | PPC_BIT(4)
> +#define L2_PRD_PURGE_CMD_TRIGGER PPC_BIT(0)
> +#define L2CAC_FLUSH 0x0
> +#define L3_PRD_PURGE_REG 0x1180E
> +#define L3_PRD_PURGE_REQ PPC_BIT(0)
> +#define L3_PRD_PURGE_TTYPE_MASK PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3) | PPC_BIT(4)
> +#define L3_FULL_PURGE 0x0
> +
> #endif /* __NPU2_REGS_H */
>
--
Alexey
More information about the Skiboot
mailing list