[Skiboot] [PATCH v2] Add purging CPU L2 and L3 caches into NPU hreset.
Rashmica Gupta
rashmica.g at gmail.com
Mon Dec 3 09:50:10 AEDT 2018
On Fri, 2018-11-30 at 12:05 +1100, Alexey Kardashevskiy wrote:
> >
> On 21/11/2018 10:44, Rashmica Gupta wrote:
...
> > +static int start_l2_purge(uint32_t chip_id, uint32_t core_id)
> > +{
> > + int rc;
> > + uint64_t addr = XSCOM_ADDR_P9_EX(core_id,
> > L2_PRD_PURGE_CMD_REG);
> > +
> > + rc = xscom_write_mask(chip_id, addr, L2CAC_FLUSH,
> > + L2_PRD_PURGE_CMD_TYPE_MASK);
> > + if (rc)
> > + goto out;
> > + rc = xscom_write_mask(chip_id, addr, L2_PRD_PURGE_CMD_TRIGGER,
> > + L2_PRD_PURGE_CMD_TRIGGER);
> > + if (rc)
> > +out: prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM
> > write_mask "
> > + "failed %i\n", core_id, rc);
>
>
> Nit: I find such use of goto with a label inside another scope rather
> confusing. Goto is usually used to avoid code duplication on a
> cleanup
> path and this is not the case.
>
> I'd rather ditch "goto" and do "if (!rc) xscom_write_mask" for the
> second xscom_write_mask() or move "out:" 1 line higher. Up to
> Stewart really :)
>
>
> > + return rc;
> > +
>
> Unnecessary empty line.
>
> > +}
> > +
> > +static int wait_l2_purge(uint32_t chip_id, uint32_t core_id)
> > +{
> > + int rc;
> > + unsigned long now = mftb();
> > + unsigned long end = now + msecs_to_tb(2);
> > + uint64_t val = L2_PRD_PURGE_CMD_REG_BUSY;
> > + uint64_t addr = XSCOM_ADDR_P9_EX(core_id,
> > L2_PRD_PURGE_CMD_REG);
> > +
> > + while (val & L2_PRD_PURGE_CMD_REG_BUSY) {
>
> for (val = L2_PRD_PURGE_CMD_REG_BUSY;;)
>
> and....
>
> > + rc = xscom_read(chip_id, addr, &val);
> > + if (rc) {
> > + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM
> > read "
> > + "failed %i\n", core_id, rc);
> > + break;
> > + }
>
>
> ... break out here:
> if (val & L2_PRD_PURGE_CMD_REG_BUSY)
> break;
>
> As if you read val with the bit set, then checking timeout is rather
> pointless, no?
>
I think you mean:
if (!(val & L2_PRD_PURGE_CMD_REG_BUSY))
break;
??
Checking timeout is pointless if the purge is completed, but if it
hasn't completed then we do want to check the timeout.
>
> > + now = mftb();
> > + if (tb_compare(now, end) == TB_AAFTERB) {
> > + prlog(PR_ERR, "PURGE L2 on core 0x%x timed out
> > %i\n",
> > + core_id, rc);
> > + return OPAL_BUSY;
> > + }
> > + }
> > +
> > + /* We have to clear the trigger bit ourselves */
> > + val &= ~L2_PRD_PURGE_CMD_TRIGGER;
> > + rc = xscom_write(chip_id, addr, val);
> > + if (rc)
> > + prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write
> > failed %i\n",
> > + core_id, rc);
> > + return rc;
> > +
>
> Unnecessary empty line.
>
>
> > +}
> > +
> > +static int start_l3_purge(uint32_t chip_id, uint32_t core_id)
> > +{
> > + int rc = 0;
> > + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG);
> > +
> > + rc = xscom_write_mask(chip_id, addr, L3_FULL_PURGE,
> > + L3_PRD_PURGE_TTYPE_MASK);
> > + if (rc)
> > + goto out;
> > + rc = xscom_write_mask(chip_id, addr, L3_PRD_PURGE_REQ,
> > + L3_PRD_PURGE_REQ);
> > + if (rc)
> > +out: prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM
> > write_mask "
> > + "failed %i\n", core_id, rc);
> > + return rc;
> > +
> > +}
> > +
> > +static int wait_l3_purge(uint32_t chip_id, uint32_t core_id)
> > +{
> > + int rc;
> > + unsigned long now = mftb();
> > + unsigned long end = now + msecs_to_tb(2);
> > + uint64_t val = L3_PRD_PURGE_REQ;
> > + uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG);
> > +
> > + /* Trigger bit is automatically set to zero when flushing is
> > done*/
> > + while (val & L3_PRD_PURGE_REQ) {
> > + rc = xscom_read(chip_id, addr, &val);
> > + if (rc) {
> > + prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM
> > read "
> > + "failed %i\n", core_id, rc);
> > + break;
> > + }
> > + now = mftb();
> > + if (tb_compare(now, end) == TB_AAFTERB) {
> > + prlog(PR_ERR, "PURGE L3 on core 0x%x timed out
> > %i\n",
> > + core_id, rc);
> > + return OPAL_BUSY;
> > + }
> > + }
> > + return rc;
> > +}
> > +
> > +static int64_t purge_l2_l3_caches(void)
> > +{
> > + int rc = 0;
> > + struct cpu_thread *t;
> > + uint64_t core_id, prev_core_id = 0xdeadbeef;
>
> I'd make it (uint64_t)-1 :) deafbeaf is more for poisoning memory.
>
Ok!
>
> > +
> > + if (proc_gen != proc_gen_p9)
> > + return OPAL_UNSUPPORTED;
>
>
> I assume this is a leftover from when it was an OPAL call. There is
> no
> even small chance for this to be called for anything but p9, right?
>
Good point :)
>
> > +
> > + for_each_ungarded_cpu(t) {
> > + /* Only need to do it once per core chiplet */
> > + core_id = pir_to_core_id(t->pir);
> > + if (prev_core_id == core_id)
> > + continue;
> > + prev_core_id = core_id;
> > + if (start_l2_purge(t->chip_id, core_id))
> > + goto out;
> > + if (start_l3_purge(t->chip_id, core_id))
> > + goto out;
> > + }
> > +
> > + for_each_ungarded_cpu(t) {
> > + /* Only need to do it once per core chiplet */
> > + core_id = pir_to_core_id(t->pir);
> > + if (prev_core_id == core_id)
> > + continue;
>
> Looks like if there is a single core at all (quite unlikely but still
> a
> possible situation), we won't wait below at all.
Sorry I don't understand why you think that. Can you please elaborate?
>
> > + prev_core_id = core_id;
> > +
> > + if (wait_l2_purge(t->chip_id, core_id))
> > + goto out;
> > + if (wait_l3_purge(t->chip_id, core_id))
> > + goto out;
> > + }
> > + return rc;
>
>
> return OPAL_SUCCESS;
>
>
> > +out:
> > + prlog(PR_ERR, "Failed on core: 0x%llx\n", core_id);
> > + return rc;
>
> return OPAL_BUSY_EVENT. Now it always returns 0.
>
woops, good spot :)
>
> > +
> > +}
> > +
> > static int64_t npu2_hreset(struct pci_slot *slot __unused)
> > {
> > struct npu2 *p;
> > @@ -1125,6 +1260,7 @@ static int64_t npu2_hreset(struct pci_slot
> > *slot __unused)
> > reset_ntl(ndev);
> > }
> > }
> > + purge_l2_l3_caches();
>
>
> This takes care of the guest termination case (which is good).
>
> Now in order to cover the period between the guest started rebooting
> and
> the guest started onlining the memory, we also need
> purge_l2_l3_caches()
> near npu2_dev_procedure_reset() in npu2_dev_cfg_exp_devcap() as this
> is
> the reset method used for NPU when the guest reboots.
>
>
OK. Does it matter if purge_l2_l3_caches() is called before or after
npu2_dev_procedure_reset()?
>
> > return OPAL_SUCCESS;
>
>
> return purge_l2_l3_caches(), otherwise the timeout error is lost.
>
>
ack.
>
>
> > }
> >
> > diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> > index 165e0b79..20a61e28 100644
> > --- a/include/npu2-regs.h
> > +++ b/include/npu2-regs.h
> > @@ -749,4 +749,15 @@ void npu2_scom_write(uint64_t gcid, uint64_t
> > scom_base,
> > #define OB3_ODL1_TRAINING_STATUS 0xC01082F
> > #define OB_ODL_TRAINING_STATUS_STS_RX_PATTERN_B PPC_BITMASK(8,
> > 15)
> >
> > +/* Registers and bits used to clear the L2 and L3 cache */
> > +#define L2_PRD_PURGE_CMD_REG 0x1080E
> > +#define L2_PRD_PURGE_CMD_REG_BUSY 0x0040000000000000
> > +#define L2_PRD_PURGE_CMD_TYPE_MASK PPC_BIT(1) | PPC_BIT(2)
> > | PPC_BIT(3) | PPC_BIT(4)
> > +#define L2_PRD_PURGE_CMD_TRIGGER PPC_BIT(0)
> > +#define L2CAC_FLUSH 0x0
> > +#define L3_PRD_PURGE_REG 0x1180E
> > +#define L3_PRD_PURGE_REQ PPC_BIT(0)
> > +#define L3_PRD_PURGE_TTYPE_MASK PPC_BIT(1) | PPC_BIT(2)
> > | PPC_BIT(3) | PPC_BIT(4)
> > +#define L3_FULL_PURGE 0x0
> > +
> > #endif /* __NPU2_REGS_H */
> >
>
>
More information about the Skiboot
mailing list