[Skiboot] [PATCH V3 3/5] capi: Enable capi mode

Frederic Barrat fbarrat at linux.vnet.ibm.com
Tue May 9 21:17:22 AEST 2017


Le 04/05/2017 à 17:57, Christophe Lombard a écrit :
> Enable the Coherently attached processor interface. The PHB is used as
> a CAPI interface.
> CAPI Adapters can be connected to whether PEC0 or PEC2. Single port

                                   ^ either?

Also title of the commit should say it's for phb4.


> CAPI adapter can be connected to either PEC0 or PEC2, but Dual-Port
> Adapter can be only connected to PEC2
>    CAPP0 attached to PHB0(PEC0 - single port)
>    CAPP1 attached to PHB3(PEC2 - single or dual port)
> As we did for PHB3, a new specific file 'phb4-capp.h' is created to
> contain the CAPP register definitions.
>
> Signed-off-by: Christophe Lombard <clombard at linux.vnet.ibm.com>
> ---
>  core/cpu.c          |  17 +-
>  hw/phb4.c           | 511 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/chip.h      |   1 +
>  include/opal-api.h  |   1 +
>  include/phb4-capp.h |  60 ++++++
>  include/phb4-regs.h |   7 +-
>  include/phb4.h      |   1 +
>  7 files changed, 591 insertions(+), 7 deletions(-)
>  create mode 100644 include/phb4-capp.h
>
> diff --git a/core/cpu.c b/core/cpu.c
> index c7e650d..a096d2f 100644
> --- a/core/cpu.c
> +++ b/core/cpu.c
> @@ -1099,12 +1099,27 @@ static int64_t opal_reinit_cpus(uint64_t flags)
>  opal_call(OPAL_REINIT_CPUS, opal_reinit_cpus, 1);
>
>  #define NMMU_XLAT_CTL_PTCR 0xb
> +#define NMMU_XLAT_CTL_SMCR 0x12
>  static int64_t nmmu_set_ptcr(uint64_t chip_id, struct dt_node *node, uint64_t ptcr)
>  {
>  	uint32_t nmmu_base_addr;
> +	struct proc_chip *chip;
> +	uint64_t reg;
> +	int64_t rc;
>
>  	nmmu_base_addr = dt_get_address(node, 0, NULL);
> -	return xscom_write(chip_id, nmmu_base_addr + NMMU_XLAT_CTL_PTCR, ptcr);
> +	rc = xscom_write(chip_id, nmmu_base_addr + NMMU_XLAT_CTL_PTCR, ptcr);
> +	if (!rc) {
> +		/* Disable for Radix page-walk cache */
> +		chip = get_chip(chip_id);
> +		if ((chip->type == PROC_CHIP_P9_NIMBUS) ||
> +		    (chip->type == PROC_CHIP_P9_CUMULUS)) {
> +			xscom_read(chip_id, nmmu_base_addr + NMMU_XLAT_CTL_SMCR, &reg);
> +			reg |= PPC_BIT(52);
> +			xscom_write(chip_id, nmmu_base_addr + NMMU_XLAT_CTL_SMCR, reg);
> +		}
> +	}
> +	return rc;
>  }

[ + Alistair for awareness ]

It should be a separate patch on its own, to make it more visible.
We had discussed it, but for the record: we need to limit the workaround 
to DD1.x and DD2.0, as it will be fixed with DD2.1.
Stewart can comment, but it looks like it can be tested with 
chip->ec_level. From chip.h, it's encoded like:
	/* 0xMm (DD1.0 = 0x10) */
Also, I think we should drop the test on P9_CUMULUS, since we don't know 
how things will look like there.



>  /*
> diff --git a/hw/phb4.c b/hw/phb4.c
> index 0b0c3d8..921e211 100644
> --- a/hw/phb4.c
> +++ b/hw/phb4.c
> @@ -48,7 +48,7 @@
>  #include <affinity.h>
>  #include <phb4.h>
>  #include <phb4-regs.h>
> -#include <capp.h>
> +#include <phb4-capp.h>
>  #include <fsp.h>
>  #include <chip.h>
>  #include <chiptod.h>
> @@ -76,6 +76,11 @@ static void phb4_init_hw(struct phb4 *p, bool first_init);
>  #define PHBLOGCFG(p, fmt, a...) do {} while (0)
>  #endif
>
> +enum capi_dma_tvt {
> +	CAPI_DMA_TVT0,
> +	CAPI_DMA_TVT1,
> +};
> +
>  /* Note: The "ASB" name is historical, practically this means access via
>   * the XSCOM backdoor
>   */
> @@ -2101,6 +2106,119 @@ static int64_t phb4_freset(struct pci_slot *slot)
>  	return OPAL_HARDWARE;
>  }
>
> +extern struct lock capi_lock;
> +
> +static void do_capp_recovery_scoms(struct phb4 *p)
> +{
> +	uint64_t reg;
> +	uint32_t offset;
> +
> +	PHBDBG(p, "Doing CAPP recovery scoms\n");
> +
> +	offset = PHB4_CAPP_REG_OFFSET(p);
> +	/* disable snoops */
> +	xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0);
> +
> +	/* clear err rpt reg*/
> +	xscom_write(p->chip_id, CAPP_ERR_RPT_CLR + offset, 0);
> +	/* clear capp fir */
> +	xscom_write(p->chip_id, CAPP_FIR + offset, 0);
> +
> +	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
> +	reg &= ~(PPC_BIT(0) | PPC_BIT(1));
> +	xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, reg);
> +}
> +
> +/*
> + * Disable CAPI mode on a PHB.
> + *
> + * Must be done while PHB is fenced and in recovery. Leaves CAPP in recovery -
> + * we can't come out of recovery until the PHB has been reinitialised.
> + *
> + * We don't reset generic error registers here - we rely on phb4_init_hw() to
> + * do that.
> + *
> + * Sets PHB4_CAPP_DISABLING flag when complete.
> + */
> +static void disable_capi_mode(struct phb4 *p)
> +{
> +	struct proc_chip *chip = get_chip(p->chip_id);
> +	uint64_t reg;
> +	uint32_t offset = PHB4_CAPP_REG_OFFSET(p);
> +
> +	lock(&capi_lock);
> +
> +	xscom_read(p->chip_id, p->pe_xscom + 0x7, &reg);
> +	if (!(reg & PPC_BIT(0))) {
> +		/* Not in CAPI mode, no action required */
> +		goto out;
> +	}
> +
> +	PHBDBG(p, "CAPP: Disabling CAPI mode\n");
> +	if (!(chip->capp_phb4_attached_mask & (1 << p->index)))
> +		PHBERR(p, "CAPP: CAPP attached mask not set!\n");
> +
> +	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
> +	if (!(reg & PPC_BIT(0))) {
> +		PHBERR(p, "CAPP: not in recovery, can't disable CAPI mode!\n");
> +		goto out;
> +	}
> +
> +	/* CAPI will not be decoded. */
> +	out_be64(p->regs + PHB_CAPI_CMPM, 0ull);
> +
> +	/* Disable NBW decoding */
> +	out_be64(p->regs + PHB_PBL_NBW_CMP_MASK, 0ull);
> +
> +	/* Disable ASN */
> +	out_be64(p->regs + PHB_PBL_ASN_CMPM, 0ull);
> +
> +	/* APC Master PB Control Register - disable examining cResps */
> +	xscom_read(p->chip_id, APC_MASTER_PB_CTRL + offset, &reg);
> +	reg &= ~PPC_BIT(0);
> +	xscom_write(p->chip_id, APC_MASTER_PB_CTRL + offset, reg);
> +
> +	/* APC Master Config Register - de-select PHBs */
> +	xscom_read(p->chip_id, APC_MASTER_CAPI_CTRL + offset, &reg);
> +	reg &= ~PPC_BITMASK(2, 3);
> +	xscom_write(p->chip_id, APC_MASTER_CAPI_CTRL + offset, reg);
> +
> +	/* Snoop CAPI Configuration Register - disable snooping */
> +	xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0ull);
> +
> +	/* Read Epsilon Tier0 _ default value */
> +	xscom_write(p->chip_id, SNOOP_CONTROL + offset, 0ull);
> +
> +	/* CAPP Transport Control Register */
> +	xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, 0x0001000000000000);
> +
> +	/* Epoch and Recovery Timers Control Register */
> +	xscom_write(p->chip_id, EPOCH_RECOVERY_TIMERS_CTRL + offset, 0ull);
> +
> +	/* Flush SUE State Map Register */
> +	xscom_write(p->chip_id, FLUSH_SUE_STATE_MAP + offset, 0ull);
> +
> +	/* Flush SUE uOP1 Register */
> +	xscom_write(p->chip_id, FLUSH_SUE_UOP1 + offset, 0ull);
> +
> +	/* APC FSM Read Machine Mask Register */
> +	xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, 0ull);
> +
> +	/* Snoop copy of FSM Read Machine Mask Register */
> +	xscom_write(p->chip_id, XPT_FSM_RMM + offset, 0ull);
> +
> +	/* PE Secure CAPP Enable Register - we're all done! Disable CAPP mode! */
> +	xscom_write(p->chip_id, p->pe_xscom + 0x7, 0ull);
> +
> +	/* Trigger CAPP recovery scoms after reinit */
> +	p->flags |= PHB4_CAPP_DISABLING;
> +
> +	chip->capp_phb4_attached_mask &= ~(1 << p->index);
> +
> +out:
> +	unlock(&capi_lock);
> +}
> +
>  static int64_t phb4_creset(struct pci_slot *slot)
>  {
>  	struct phb4 *p = phb_to_phb4(slot->phb);
> @@ -2110,12 +2228,11 @@ static int64_t phb4_creset(struct pci_slot *slot)
>  	case PHB4_SLOT_CRESET_START:
>  		PHBDBG(p, "CRESET: Starts\n");
>
> -		/* do steps 3-5 of capp recovery procedure */
> -#if 0
>  		if (p->flags & PHB4_CAPP_RECOVERY)
>  			do_capp_recovery_scoms(p);
> -#endif
> -		/* XXX TODO XXX */
> +
> +		if (!(p->flags & PHB4_CAPP_RECOVERY))
> +			disable_capi_mode(p);

Are we ready to support this? I guess that's a question to be sorted out 
with the PSL team.
disable_capi_mode() is mostly useful to support kexec(), which is 
desirable, but I'm wondering if it's not a bit early.


>
>  		pci_slot_set_state(slot, PHB4_SLOT_CRESET_WAIT_CQ);
>  		slot->retries = 500;
> @@ -2128,6 +2245,12 @@ static int64_t phb4_creset(struct pci_slot *slot)
>  		p->flags &= ~PHB4_AIB_FENCED;
>  		p->flags &= ~PHB4_CAPP_RECOVERY;
>  		phb4_init_hw(p, false);
> +
> +		if (p->flags & PHB4_CAPP_DISABLING) {
> +			do_capp_recovery_scoms(p);
> +			p->flags &= ~PHB4_CAPP_DISABLING;
> +		}
> +
>  		pci_slot_set_state(slot, PHB4_SLOT_CRESET_FRESET);
>  		return pci_slot_set_sm_timeout(slot, msecs_to_tb(100));
>  	case PHB4_SLOT_CRESET_FRESET:
> @@ -2499,6 +2622,338 @@ static int64_t phb4_get_diag_data(struct phb *phb,
>  	return OPAL_SUCCESS;
>  }
>
> +static uint64_t tve_encode_50b_noxlate(uint64_t start_addr, uint64_t end_addr)
> +{
> +	uint64_t tve;
> +
> +	/*
> +	 * Put start address bits 49:24 into TVE[52:53]||[0:23]
> +	 * and end address bits 49:24 into TVE[54:55]||[24:47]
> +	 * and set TVE[51]
> +	 */
> +	tve  = (start_addr << 16) & (0xffffffull << 40);
> +	tve |= (start_addr >> 38) & (3ull << 10);
> +	tve |= (end_addr >>  8) & (0xfffffful << 16);
> +	tve |= (end_addr >> 40) & (3ull << 8);
> +	tve |= PPC_BIT(51) | IODA3_TVT_NON_TRANSLATE_50;
> +	return tve;
> +}
> +
> +static void phb4_init_capp_regs(struct phb4 *p)
> +{
> +	uint64_t reg;
> +	uint32_t offset;
> +
> +	offset = PHB4_CAPP_REG_OFFSET(p);
> +
> +	/* Enable cresp examination by CAPP */
> +	xscom_read(p->chip_id, APC_MASTER_PB_CTRL + offset, &reg);
> +	reg |= PPC_BIT(0);
> +	if (p->rev == PHB4_REV_NIMBUS_DD10) {
> +		reg |= PPC_BIT(1);
> +		/* disable vg not sys */
> +		reg |= PPC_BIT(3);
> +	}
> +	xscom_write(p->chip_id, APC_MASTER_PB_CTRL + offset, reg);
> +
> +	/* Set PHB mode, HPC Dir State and P9 mode */
> +	xscom_write(p->chip_id, APC_MASTER_CAPI_CTRL + offset, 0x1772000000000000);
> +	PHBINF(p, "CAPP: port attached\n");
> +
> +	/* Set snoop ttype decoding , dir size to 256k */
> +	xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0xA000000000000000);
> +
> +	/* Use Read Epsilon Tier2 for all scopes, Address Pipeline Master
> +	 * Wait Count to highest(1023) and Number of rpt_hang.data to 3
> +	 */
> +	xscom_write(p->chip_id, SNOOP_CONTROL + offset, 0x8000000010072000);
> +
> +	/* TLBI Hang Divider = 1.  LPC buffers=0. X16 PCIe(14 buffers) */
> +	xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, 0x081400000000000A);


Mellanox had requested to set the max for the TLBI hang divider, as 
their CX5 could have a pretty long path so we shouldn't timeout too 
fast. The capp team semmed ok with it, it is just more annoying for 
debug (more traces in the buffers when we detect the timeout).


> +
> +	/* Enable epoch timer */
> +	xscom_write(p->chip_id, EPOCH_RECOVERY_TIMERS_CTRL + offset, 0xC0000000FFF0FFFE);
> +
> +	xscom_write(p->chip_id, FLUSH_SUE_STATE_MAP + offset, 0x1DCF5F6600000000);
> +	xscom_write(p->chip_id, FLUSH_SUE_UOP1 + offset, 0xE310280428000000);
> +
> +	/* capp owns PHB read buffers */
> +	xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, 0xFFFFFFFFFFFF0000);
> +	xscom_write(p->chip_id, XPT_FSM_RMM + offset, 0xFFFFFFFFFFFF0000);
> +
> +	/* Initialize CI Store Buffers */
> +	xscom_read(p->chip_id, TRANSPORT_CONTROL + offset, &reg);
> +	reg |= PPC_BIT(63);
> +	xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, reg);
> +
> +	/* Deassert TLBI_FENCED and tlbi_psl_is_dead */
> +	xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, 0);
> +}
> +
> +/* override some inits with CAPI defaults */
> +static void phb4_init_capp_errors(struct phb4 *p)
> +{
> +	/* Init_77: TXE Error AIB Fence Enable Register */
> +	out_be64(p->regs + 0x0d30,	0xdff7ff0bf7ddfff0ull);
> +
> +	/* Init_86: RXE_ARB Error AIB Fence Enable Register */
> +	out_be64(p->regs + 0x0db0,	0xfbffd7bbff7fbfefull);
> +
> +	/* Init_95: RXE_MRG Error AIB Fence Enable Register */
> +	out_be64(p->regs + 0x0e30,	0xfffffeffff7fff57ull);
> +
> +	/* Init_104: RXE_TCE Error AIB Fence Enable Register */
> +	out_be64(p->regs + 0x0eb0,	0xffaeffafffffffffull);
> +
> +	/* Init_113: PHB Error AIB Fence Enable Register */
> +	out_be64(p->regs + 0x0cb0,	0x35777073ff000000ull);
> +}
> +
> +/* Power Bus Common Queue Registers
> + * All PBCQ and PBAIB registers are accessed via SCOM
> + * NestBase = 4010C00 for PEC0
> + *            4011000 for PEC1
> + *            4011400 for PEC2
> + *
> + * Some registers are shared amongst all of the stacks and will only
> + * have 1 copy. Other registers are implemented one per stack.
> + * Registers that are duplicated will have an additional offset
> + * of “StackBase” so that they have a unique address.
> + * Stackoffset = 00000040 for Stack0
> + *             = 00000080 for Stack1
> + *             = 000000C0 for Stack2
> + */
> +static int64_t enable_capi_mode(struct phb4 *p, uint64_t pe_number,
> +				enum capi_dma_tvt dma_tvt)
> +{
> +	uint64_t reg, start_addr, end_addr;
> +	uint32_t offset;
> +	int i;
> +
> +	xscom_read(p->chip_id, p->pe_xscom + 0x7, &reg);
> +	if (reg & PPC_BIT(0))
> +		PHBDBG(p, "Already in CAPP mode\n");
> +
> +	/* PEC Phase 3 (PBCQ) registers Init */
> +	/* poll cqstat
> +	 * CAPP0 attached to PHB0(PEC0)
> +	 * CAPP1 attached to PHB3(PEC2)
> +	 */
> +	offset = 0x40;
> +	if (p->index > 0 && p->index < 3)
> +		offset = 0x80;
> +	else if (p->index > 2)
> +		offset = 0xC0;


Where do we detect and fail in case of an invalid phb index? I think it 
wouldn't pass the capp ucode load stage, but it seems like we should do 
it here as well. It doesn't make sense to enable capi on a phb index != 
0 and 3.


> +
> +	for (i = 0; i < 500000; i++) {
> +		xscom_read(p->chip_id, p->pe_xscom + offset + 0xC, &reg);
> +		if (!(reg & 0xC000000000000000))
> +			break;
> +		time_wait_us(10);
> +	}
> +	if (reg & 0xC000000000000000) {
> +		PHBERR(p, "CAPP: Timeout waiting for pending transaction\n");
> +		return OPAL_HARDWARE;
> +	}
> +
> +	/* Enable CAPP Mode , Set 14 CI Store buffers for CAPP,
> +	 * Set 48 Read machines for CAPP.
> +	 */
> +	reg = 0x800EFFFFFFFFFFFF;
> +	xscom_write(p->chip_id, p->pe_xscom + 0x7, reg);
> +
> +	if (p->rev == PHB4_REV_NIMBUS_DD10) {
> +		/* Ignores the PB init signal */
> +		xscom_read(p->chip_id, p->pe_xscom + 0x0, &reg);
> +		reg |= PPC_BIT(12);
> +		xscom_write(p->chip_id, p->pe_xscom + 0x0, reg);
> +	}
> +
> +	/* PEC Phase 4 (PHB) registers adjustment
> +	 * Bit [0:7] XSL_DSNCTL[capiind]
> +	 * Init_25 - CAPI Compare/Mask
> +	 */
> +	out_be64(p->regs + PHB_CAPI_CMPM,
> +		 0x0200FE0000000000Ull | PHB_CAPI_CMPM_ENABLE);
> +
> +	if (!(p->rev == PHB4_REV_NIMBUS_DD10)) {
> +		/* Init_123 :  NBW Compare/Mask Register */
> +		out_be64(p->regs + PHB_PBL_NBW_CMP_MASK,
> +			 0x0300FF0000000000Ull | PHB_CAPI_NBW_MASK_ENABLE);
> +
> +		/* Init_24 - ASN Compare/Mask */
> +		out_be64(p->regs + PHB_PBL_ASN_CMPM,
> +			 0x0400FF0000000000Ull | PHB_CAPI_ASN_ENABLE);
> +	}
> +
> +	/* non-translate/50-bit mode */
> +	out_be64(p->regs + PHB_XLATE_PREFIX, 0x0000000000000000Ull);
> +
> +	/* set tve no translate mode allow mmio window */
> +	memset(p->tve_cache, 0x0, sizeof(p->tve_cache));
> +
> +	/*
> +	 * In 50-bit non-translate mode, the fields of the TVE are
> +	 * used to perform an address range check. In this mode TCE
> +	 * Table Size(0) must be a '1' (TVE[51] = 1)
> +	 *      PCI Addr(49:24) >= TVE[52:53]+TVE[0:23] and
> +	 *      PCI Addr(49:24) < TVE[54:55]+TVE[24:47]
> +	 *
> +	 * TVE[51] = 1
> +	 * TVE[56] = 1: 50-bit Non-Translate Mode Enable
> +	 * TVE[0:23] = 0x000000
> +	 * TVE[24:47] = 0xFFFFFF
> +	 *
> +	 * capi dma mode: CAPP DMA mode needs access to all of memory
> +	 * capi mode: Allow address range (bit 14 = 1)
> +	 *            0x0002000000000000: 0x0002FFFFFFFFFFFF
> +	 *            TVE[52:53] = '10' and TVE[54:55] = '10'
> +	 *
> +	 * --> we use capi dma mode by default
> +	 */
> +	if (dma_tvt == CAPI_DMA_TVT0) {
> +		/*
> +		 * TVT#0: CAPI window + DMA, all memory
> +		 */
> +		start_addr = 0ull;
> +		end_addr   = 0x0003ffffffffffffull;
> +		p->tve_cache[pe_number * 2] =
> +			tve_encode_50b_noxlate(start_addr, end_addr);
> +	} else {
> +		/*
> +		 * TVT#0: CAPI window, in bypass mode
> +		 * TVT#1: DMA, all memory, in bypass mode
> +		 */
> +		start_addr = 0x0002000000000000ull;
> +		end_addr   = 0x0002FFFFFFFFFFFFull;
> +		p->tve_cache[pe_number * 2] =
> +			tve_encode_50b_noxlate(start_addr, end_addr);
> +
> +		start_addr = (1ull << 59);
> +		end_addr   = start_addr + 0x0003ffffffffffffull;
> +		p->tve_cache[pe_number * 2 + 1] =
> +			tve_encode_50b_noxlate(start_addr, end_addr);
> +	}

This is my mistake: for Mellanox (CAPI_DMA_TVT1), we should define TVT#0 
like for "normal" capi, i.e. same as CAPI_DMA_TVT0. I'm pretty sure it 
will create problems with the dummy read buffer otherwise (and 
potentially others).


> +
> +	phb4_ioda_sel(p, IODA3_TBL_TVT, 0, true);
> +	for (i = 0; i < p->tvt_size; i++)
> +		out_be64(p->regs + PHB_IODA_DATA0, p->tve_cache[i]);
> +
> +	/* set mbt bar to pass capi mmio window. First applied cleared
> +	 * values to HW
> +	 */
> +	for (i = 0; i < p->mbt_size; i++) {
> +		p->mbt_cache[i][0] = 0;
> +		p->mbt_cache[i][1] = 0;
> +	}
> +	phb4_ioda_sel(p, IODA3_TBL_MBT, 0, true);
> +	for (i = 0; i < p->mbt_size; i++) {
> +		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][0]);
> +		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][1]);
> +	}
> +
> +	p->mbt_cache[0][0] = IODA3_MBT0_ENABLE |
> +			     IODA3_MBT0_TYPE_M64 |
> +		SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_SINGLE_PE) |
> +		SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0) |
> +		(p->mm0_base & IODA3_MBT0_BASE_ADDR);
> +	p->mbt_cache[0][1] = IODA3_MBT1_ENABLE |
> +		((~(p->mm0_size - 1)) & IODA3_MBT1_MASK) |
> +		SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, 0ull, pe_number);
> +
> +	p->mbt_cache[1][0] = IODA3_MBT0_ENABLE |
> +			     IODA3_MBT0_TYPE_M64 |
> +		SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_SINGLE_PE) |
> +		SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0) |
> +		(0x0002000000000000ULL & IODA3_MBT0_BASE_ADDR);
> +	p->mbt_cache[1][1] = IODA3_MBT1_ENABLE |
> +		(0x00ff000000000000ULL & IODA3_MBT1_MASK) |
> +		SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, 0ull, pe_number);
> +
> +	phb4_ioda_sel(p, IODA3_TBL_MBT, 0, true);
> +	for (i = 0; i < p->mbt_size; i++) {
> +		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][0]);
> +		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][1]);
> +	}
> +
> +	phb4_init_capp_errors(p);
> +
> +	phb4_init_capp_regs(p);
> +
> +	if (!chiptod_capp_timebase_sync(p->chip_id, CAPP_TFMR,
> +					CAPP_TB,
> +					PHB4_CAPP_REG_OFFSET(p))) {
> +		PHBERR(p, "CAPP: Failed to sync timebase\n");
> +		return OPAL_HARDWARE;
> +	}
> +	return OPAL_SUCCESS;
> +}
> +
> +static int64_t phb4_set_capi_mode(struct phb *phb, uint64_t mode,
> +				  uint64_t pe_number)
> +{
> +	struct phb4 *p = phb_to_phb4(phb);
> +	struct proc_chip *chip = get_chip(p->chip_id);
> +	uint64_t reg;
> +	uint32_t offset;
> +
> +
> +	lock(&capi_lock);
> +	chip->capp_phb4_attached_mask |= 1 << p->index;
> +	unlock(&capi_lock);
> +
> +	offset = PHB4_CAPP_REG_OFFSET(p);
> +	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
> +	if ((reg & PPC_BIT(5))) {
> +		PHBERR(p, "CAPP: recovery failed (%016llx)\n", reg);
> +		return OPAL_HARDWARE;
> +	} else if ((reg & PPC_BIT(0)) && (!(reg & PPC_BIT(1)))) {
> +		PHBDBG(p, "CAPP: recovery in progress\n");
> +		return OPAL_BUSY;
> +	}
> +
> +	switch (mode) {
> +	case OPAL_PHB_CAPI_MODE_PCIE:
> +		return OPAL_UNSUPPORTED;
> +
> +	case OPAL_PHB_CAPI_MODE_CAPI:
> +		return enable_capi_mode(p, pe_number, CAPI_DMA_TVT0);
> +
> +	case OPAL_PHB_CAPI_MODE_DMA:
> +		/* shouldn't be called, enabled by default on p9 */
> +		return OPAL_UNSUPPORTED;
> +
> +	case OPAL_PHB_CAPI_MODE_DMA_TVT1:
> +		return enable_capi_mode(p, pe_number, CAPI_DMA_TVT1);
> +
> +	case OPAL_PHB_CAPI_MODE_SNOOP_OFF:
> +		/* nothing to do */
> +		return OPAL_SUCCESS;

I think we should fail here since we don't support turning snooping off. 
We can revisit if the capp tlbi filtering hw bug is ever fixed.
An alternative would be to try to *really* deactivate snooping (and not 
rely on capp tlbi filtering), but that needs more investigation as the 
capp team had mentioned it's more complicated to do.


> +	case OPAL_PHB_CAPI_MODE_SNOOP_ON:
> +		/* nothing to do */
> +		return OPAL_SUCCESS;
> +	}
> +
> +	return OPAL_UNSUPPORTED;
> +}
> +
> +static int64_t phb4_set_capp_recovery(struct phb *phb)
> +{
> +	struct phb4 *p = phb_to_phb4(phb);
> +
> +	if (p->flags & PHB4_CAPP_RECOVERY)
> +		return 0;
> +
> +	/* set opal event flag to indicate eeh condition */
> +	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
> +				OPAL_EVENT_PCI_ERROR);
> +
> +	p->flags |= PHB4_CAPP_RECOVERY;
> +
> +	return 0;
> +}
> +
>  static const struct phb_ops phb4_ops = {
>  	.cfg_read8		= phb4_pcicfg_read8,
>  	.cfg_read16		= phb4_pcicfg_read16,
> @@ -2531,6 +2986,8 @@ static const struct phb_ops phb4_ops = {
>  	.get_diag_data		= NULL,
>  	.get_diag_data2		= phb4_get_diag_data,
>  	.tce_kill		= phb4_tce_kill,
> +	.set_capi_mode		= phb4_set_capi_mode,
> +	.set_capp_recovery	= phb4_set_capp_recovery,
>  };
>
>  static void phb4_init_ioda3(struct phb4 *p)
> @@ -3090,6 +3547,9 @@ static void phb4_add_properties(struct phb4 *p)
>
>  	dt_add_property_cells(np, "ibm,phb-diag-data-size",
>  			      sizeof(struct OpalIoPhb4ErrorData));
> +
> +	/* Indicate to Linux that CAPP timebase sync is supported */
> +	dt_add_property_string(np, "ibm,capp-timebase-sync", NULL);
>  }
>
>  static bool phb4_calculate_windows(struct phb4 *p)
> @@ -3215,6 +3675,44 @@ static const struct irq_source_ops phb4_lsi_ops = {
>  	.attributes = phb4_lsi_attributes,
>  };
>
> +/*
> + * Trigger a creset to disable CAPI mode on kernel shutdown.
> + *
> + * This helper is called repeatedly by the host sync notifier mechanism, which
> + * relies on the kernel to regularly poll the OPAL_SYNC_HOST_REBOOT call as it
> + * shuts down.
> + *
> + * This is a somewhat hacky abuse of the host sync notifier mechanism, but the
> + * alternatives require a new API call which won't work for older kernels.
> + */
> +static bool phb4_host_sync_reset(void *data)
> +{
> +	struct phb4 *p = (struct phb4 *)data;
> +	struct pci_slot *slot = p->phb.slot;
> +	struct proc_chip *chip = get_chip(p->chip_id);
> +	int64_t rc;
> +
> +	switch (slot->state) {
> +	case PHB4_SLOT_NORMAL:
> +		lock(&capi_lock);
> +		rc = (chip->capp_phb4_attached_mask & (1 << p->index)) ?
> +			OPAL_PHB_CAPI_MODE_CAPI :
> +			OPAL_PHB_CAPI_MODE_PCIE;
> +		unlock(&capi_lock);
> +
> +		if (rc == OPAL_PHB_CAPI_MODE_PCIE)
> +			return true;
> +
> +		PHBINF(p, "PHB in CAPI mode, resetting\n");
> +		p->flags &= ~PHB4_CAPP_RECOVERY;
> +		phb4_creset(slot);
> +		return false;
> +	default:
> +		rc = slot->ops.poll(slot);
> +		return rc <= OPAL_SUCCESS;
> +	}
> +}
> +

Like for disable capi mode above, are we ready to support it at this 
stage? Again, I think it's only useful for kexec()

   Fred


>  static void phb4_create(struct dt_node *np)
>  {
>  	const struct dt_property *prop;
> @@ -3397,6 +3895,9 @@ static void phb4_create(struct dt_node *np)
>  				(p->rev == PHB4_REV_NIMBUS_DD10) ?
>  				&phb4_dd1_lsi_ops : &phb4_lsi_ops);
>
> +	/* Trigger a creset to disable CAPI mode on kernel shutdown. */
> +	opal_add_host_sync_notifier(phb4_host_sync_reset, p);
> +
>  	/* Platform additional setup */
>  	if (platform.pci_setup_phb)
>  		platform.pci_setup_phb(&p->phb, p->index);
> diff --git a/include/chip.h b/include/chip.h
> index b957e45..17f062b 100644
> --- a/include/chip.h
> +++ b/include/chip.h
> @@ -191,6 +191,7 @@ struct proc_chip {
>
>  	/* Must hold capi_lock to change */
>  	uint8_t			capp_phb3_attached_mask;
> +	uint8_t			capp_phb4_attached_mask;
>  	uint8_t			capp_ucode_loaded;
>
>  	/* Used by hw/centaur.c */
> diff --git a/include/opal-api.h b/include/opal-api.h
> index 80033c6..fef9c93 100644
> --- a/include/opal-api.h
> +++ b/include/opal-api.h
> @@ -1124,6 +1124,7 @@ enum {
>  	OPAL_PHB_CAPI_MODE_SNOOP_OFF    = 2,
>  	OPAL_PHB_CAPI_MODE_SNOOP_ON	= 3,
>  	OPAL_PHB_CAPI_MODE_DMA		= 4,
> +	OPAL_PHB_CAPI_MODE_DMA_TVT1	= 5,
>  };
>
>  /* CAPI feature flags (in device-tree) */
> diff --git a/include/phb4-capp.h b/include/phb4-capp.h
> new file mode 100644
> index 0000000..418838d
> --- /dev/null
> +++ b/include/phb4-capp.h
> @@ -0,0 +1,60 @@
> +/* Copyright 2013-2017 IBM Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + *	http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> + * implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef __PHB4_CAPP_H
> +#define __PHB4_CAPP_H
> +
> +#define CAPP_SNP_ARRAY_WRITE_REG		0x2010841  /* Satellite 2 */
> +#define CAPP_SNP_ARRAY_ADDR_REG 		0x2010828
> +#define CAPP_APC_MASTER_ARRAY_ADDR_REG		0x201082A
> +#define CAPP_APC_MASTER_ARRAY_WRITE_REG 	0x2010842  /* Satellite 2 */
> +
> +#define CAPP_FIR				0x2010800
> +#define CAPP_ERR_RPT_CLR			0x2010813
> +#define APC_MASTER_PB_CTRL			0x2010818
> +#define APC_MASTER_CAPI_CTRL			0x2010819
> +#define LCO_MASTER_TARGET			0x2010821
> +#define EPOCH_RECOVERY_TIMERS_CTRL		0x201082C
> +#define SNOOP_CAPI_CONFIG			0x201081A
> +#define SNOOP_CONTROL				0x201081B
> +#define TRANSPORT_CONTROL			0x201081C
> +#define CAPP_TB 				0x2010826
> +#define CAPP_TFMR				0x2010827
> +#define CAPP_ERR_STATUS_CTRL			0x201080E
> +#define FLUSH_SUE_STATE_MAP			0x201080F
> +#define FLUSH_CPIG_STATE_MAP			0x2010820
> +#define FLUSH_SUE_UOP1				0x2010843  /* Satellite 2 */
> +#define APC_FSM_READ_MASK			0x2010823
> +#define XPT_FSM_RMM				0x2010831
> +
> +/* CAPP0 attached to PHB0(PEC0 - single port)
> + * CAPP1 attached to PHB3(PEC2 - single or dual port)
> + */
> +#define CAPP0_PHB_INDEX 0
> +#define CAPP1_PHB_INDEX 3
> +
> +/* SCOM address Base (Ring = ‘0010’b)
> + * CAPP Unit Satellite           SCOM address Base
> + * CAPP 0    S1 (sat = ‘0000’b)  x02010800
> + * CAPP 0    S2 (sat = ‘0001’b)  x02010840
> + * CAPP 1    S1 (sat = ‘0000’b)  x04010800
> + * CAPP 1    S2 (sat = ‘0001’b)  x04010840
> + */
> +#define CAPP1_REG_OFFSET 0x2000000
> +
> +#define PHB4_CAPP_REG_OFFSET(p) ((p)->index == 0 ? 0x0 : CAPP1_REG_OFFSET)
> +
> +#endif /* __PHB4_CAPP_H */
> diff --git a/include/phb4-regs.h b/include/phb4-regs.h
> index 5480949..5eeaf18 100644
> --- a/include/phb4-regs.h
> +++ b/include/phb4-regs.h
> @@ -71,8 +71,12 @@
>  #define PHB_PEST_BAR			0x1a8
>  #define   PHB_PEST_BAR_ENABLE		PPC_BIT(0)
>  #define   PHB_PEST_BASE_ADDRESS		PPC_BITMASK(8,51)
> +#define PHB_PBL_ASN_CMPM		0x1C0
> +#define   PHB_CAPI_ASN_ENABLE		PPC_BIT(63)
> +#define PHB_CAPI_CMPM			0x1C8
> +#define   PHB_CAPI_CMPM_ENABLE		PPC_BIT(63)
>  #define PHB_M64_UPPER_BITS		0x1f0
> -#define PHB_INTREP_TIMER		0x1f8
> +#define PHB_XLATE_PREFIX		0x1f8
>  #define PHB_DMARD_SYNC			0x200
>  #define   PHB_DMARD_SYNC_START		PPC_BIT(0)
>  #define   PHB_DMARD_SYNC_COMPLETE	PPC_BIT(1)
> @@ -241,6 +245,7 @@
>  #define PHB_PBL_TIMEOUT_CTRL			0x1810
>  #define PHB_PBL_NPTAG_ENABLE			0x1820
>  #define PHB_PBL_NBW_CMP_MASK			0x1830
> +#define   PHB_CAPI_NBW_MASK_ENABLE		PPC_BIT(63)
>  #define PHB_PBL_SYS_LINK_INIT			0x1838
>  #define PHB_PBL_BUF_STATUS			0x1840
>  #define PHB_PBL_ERR_STATUS			0x1900
> diff --git a/include/phb4.h b/include/phb4.h
> index bed6eef..651946c 100644
> --- a/include/phb4.h
> +++ b/include/phb4.h
> @@ -222,6 +222,7 @@ struct phb4_err {
>  #define PHB4_CFG_USE_ASB	0x00000002
>  #define PHB4_CFG_BLOCKED	0x00000004
>  #define PHB4_CAPP_RECOVERY	0x00000008
> +#define PHB4_CAPP_DISABLING	0x00000010
>
>  struct phb4 {
>  	unsigned int		index;	    /* 0..5 index inside p9 */
>



More information about the Skiboot mailing list