[Skiboot] [PATCH v3] Add purging CPU L2 and L3 caches into NPU hreset.
Oliver
oohall at gmail.com
Tue Dec 4 11:56:11 AEDT 2018
On Mon, Dec 3, 2018 at 5:49 PM Rashmica Gupta <rashmica.g at gmail.com> wrote:
>
> If a GPU is passed through to a guest and the guest unexpectedly terminates,
> there can be cache lines in CPUs that belong to the GPU. So purge the caches
> as part of the reset sequence. L1 is write through, so doesn't need to be purged.
>
> This also needs to be called if the guest reboots so call it in
> npu2_dev_cfg_exp_devcap().
>
> The sequence to purge the L2 and L3 caches from the hw team:
>
> "L2 purge:
> (1) initiate purge
> putspy pu.ex EXP.L2.L2MISC.L2CERRS.PRD_PURGE_CMD_TYPE L2CAC_FLUSH -all
> putspy pu.ex EXP.L2.L2MISC.L2CERRS.PRD_PURGE_CMD_TRIGGER ON -all
>
> (2) check this is off in all caches to know purge completed
> getspy pu.ex EXP.L2.L2MISC.L2CERRS.PRD_PURGE_CMD_REG_BUSY -all
>
> (3) putspy pu.ex EXP.L2.L2MISC.L2CERRS.PRD_PURGE_CMD_TRIGGER OFF -all
>
> L3 purge:
> 1) Start the purge:
> putspy pu.ex EXP.L3.L3_MISC.L3CERRS.L3_PRD_PURGE_TTYPE FULL_PURGE -all
> putspy pu.ex EXP.L3.L3_MISC.L3CERRS.L3_PRD_PURGE_REQ ON -all
>
> 2) Ensure that the purge has completed by checking the status bit:
> getspy pu.ex EXP.L3.L3_MISC.L3CERRS.L3_PRD_PURGE_REQ -all
>
> You should see it say OFF if it's done:
> p9n.ex k0:n0:s0:p00:c0
> EXP.L3.L3_MISC.L3CERRS.L3_PRD_PURGE_REQ
> OFF"
>
> Suggested-by: Alistair Popple <alistair at popple.id.au>
> Signed-off-by: Rashmica Gupta <rashmica.g at gmail.com>
> ---
>
> This is done synchronously for now as it doesn't seem to take *too* long
> (purging the L2 and L3 caches after building the 4.16 linux kernel on a p9
> with 16 cores took 1.57 ms, 1.49ms and 1.46ms).
>
>
> hw/npu2.c | 135 +++++++++++++++++++++++++++++++++++++++++++-
> include/npu2-regs.h | 11 ++++
> 2 files changed, 145 insertions(+), 1 deletion(-)
>
> diff --git a/hw/npu2.c b/hw/npu2.c
> index 30049f5b..9c0e6114 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -326,6 +326,136 @@ static int64_t npu2_dev_cfg_bar(void *dev, struct pci_cfg_reg_filter *pcrf,
> return npu2_cfg_read_bar(ndev, pcrf, offset, len, data);
> }
>
> +static int start_l2_purge(uint32_t chip_id, uint32_t core_id)
> +{
> + int rc;
> + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG);
> +
> + rc = xscom_write_mask(chip_id, addr, L2CAC_FLUSH,
> + L2_PRD_PURGE_CMD_TYPE_MASK);
> + if (!rc)
> + rc = xscom_write_mask(chip_id, addr, L2_PRD_PURGE_CMD_TRIGGER,
> + L2_PRD_PURGE_CMD_TRIGGER);
> + if (rc)
> + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write_mask "
> + "failed %i\n", core_id, rc);
> + return rc;
> +}
> +
> +static int wait_l2_purge(uint32_t chip_id, uint32_t core_id)
> +{
> + int rc;
> + unsigned long now = mftb();
> + unsigned long end = now + msecs_to_tb(2);
> + uint64_t val = L2_PRD_PURGE_CMD_REG_BUSY;
> + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG);
> +
> + while (val & L2_PRD_PURGE_CMD_REG_BUSY) {
> + rc = xscom_read(chip_id, addr, &val);
> + if (rc) {
> + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM read "
> + "failed %i\n", core_id, rc);
> + break;
> + }
> + if (!(val & L2_PRD_PURGE_CMD_REG_BUSY))
> + break;
> + now = mftb();
> + if (tb_compare(now, end) == TB_AAFTERB) {
> + prlog(PR_ERR, "PURGE L2 on core 0x%x timed out %i\n",
> + core_id, rc);
> + return OPAL_BUSY;
> + }
> + }
> +
> + /* We have to clear the trigger bit ourselves */
> + val &= ~L2_PRD_PURGE_CMD_TRIGGER;
> + rc = xscom_write(chip_id, addr, val);
> + if (rc)
> + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write failed %i\n",
> + core_id, rc);
> + return rc;
> +}
> +
> +static int start_l3_purge(uint32_t chip_id, uint32_t core_id)
> +{
> + int rc;
> + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG);
> +
> + rc = xscom_write_mask(chip_id, addr, L3_FULL_PURGE,
> + L3_PRD_PURGE_TTYPE_MASK);
> + if (!rc)
> + rc = xscom_write_mask(chip_id, addr, L3_PRD_PURGE_REQ,
> + L3_PRD_PURGE_REQ);
> + if (rc)
> + prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM write_mask "
> + "failed %i\n", core_id, rc);
> + return rc;
> +}
> +
> +static int wait_l3_purge(uint32_t chip_id, uint32_t core_id)
> +{
> + int rc;
> + unsigned long now = mftb();
> + unsigned long end = now + msecs_to_tb(2);
> + uint64_t val = L3_PRD_PURGE_REQ;
> + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG);
> +
> + /* Trigger bit is automatically set to zero when flushing is done */
> + while (val & L3_PRD_PURGE_REQ) {
> + rc = xscom_read(chip_id, addr, &val);
> + if (rc) {
> + prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM read "
> + "failed %i\n", core_id, rc);
> + break;
> + }
> + if (!(val & L3_PRD_PURGE_REQ))
> + break;
> + now = mftb();
> + if (tb_compare(now, end) == TB_AAFTERB) {
> + prlog(PR_ERR, "PURGE L3 on core 0x%x timed out %i\n",
> + core_id, rc);
> + return OPAL_BUSY;
> + }
> + }
> + return rc;
> +}
> +
> +static int64_t purge_l2_l3_caches(void)
> +{
> + struct cpu_thread *t;
> + uint64_t core_id, prev_core_id = (uint64_t)-1;
> +
> + for_each_ungarded_cpu(t) {
> + /* Only need to do it once per core chiplet */
> + core_id = pir_to_core_id(t->pir);
> + if (prev_core_id == core_id)
> + continue;
> + prev_core_id = core_id;
> + if (start_l2_purge(t->chip_id, core_id))
> + goto out;
> + if (start_l3_purge(t->chip_id, core_id))
> + goto out;
> + }
> +
> + prev_core_id = (uint64_t)-1;
> + for_each_ungarded_cpu(t) {
> + /* Only need to do it once per core chiplet */
> + core_id = pir_to_core_id(t->pir);
> + if (prev_core_id == core_id)
> + continue;
> + prev_core_id = core_id;
> +
> + if (wait_l2_purge(t->chip_id, core_id))
> + goto out;
> + if (wait_l3_purge(t->chip_id, core_id))
> + goto out;
> + }
> + return OPAL_SUCCESS;
> +out:
> + prlog(PR_ERR, "Failed on core: 0x%llx\n", core_id);
You can probably delete this. It provides no useful information and
the called functions already print an error message.
> + return OPAL_BUSY_EVENT;
> +}
> +
> static int64_t npu2_dev_cfg_exp_devcap(void *dev,
> struct pci_cfg_reg_filter *pcrf __unused,
> uint32_t offset, uint32_t size,
> @@ -346,6 +476,9 @@ static int64_t npu2_dev_cfg_exp_devcap(void *dev,
> if (*data & PCICAP_EXP_DEVCTL_FUNC_RESET)
> npu2_dev_procedure_reset(ndev);
> + if (purge_l2_l3_caches())
> + return OPAL_BUSY_EVENT;
> +
> return OPAL_PARTIAL;
> }
>
> @@ -1125,7 +1258,7 @@ static int64_t npu2_hreset(struct pci_slot *slot __unused)
> reset_ntl(ndev);
> }
> }
> - return OPAL_SUCCESS;
> + return purge_l2_l3_caches();
This is more of a question for Alexy, but why are we returning
OPAL_BUSY_EVENT here? It seems like a weird hack to determine when the
cache flush failed.
More information about the Skiboot
mailing list