[Skiboot] [PATCH v2 6/8] capp/phb4: Introduce PHB4 flag, PHB4_CAPP_DISABLE to disable CAPP

christophe lombard clombard at linux.vnet.ibm.com
Tue Dec 11 21:25:49 AEDT 2018


Le 09/12/2018 à 15:17, Vaibhav Jain a écrit :
> This patch introduces a PHB4 flag PHB4_CAPP_DISABLE and scaffolding
> necessary to handle it during CRESET flow. The flag is set when CAPP
> is request to switch to PCIe mode via call to phb4_set_capi_mode()
> with mode OPAL_PHB_CAPI_MODE_PCIE. This starts the below sequence that
> ultimately ends in newly introduced phb4_slot_sm_run_completed()
> 
> 1. Set PHB4_CAPP_DISABLE to phb4->flags.
> 
> 2. Start a CRESET on the phb slot. This also starts the opal pci reset
> state machine.
> 
> 3. Wait for slot state to be PHB4_SLOT_CRESET_WAIT_CQ.
> 
> 4. Perform CAPP recovery as PHB is still fenced, by calling
> do_capp_recovery_scoms().
> 
> 5. Call newly introduced 'disable_capi_mode()' to disable CAPP.
> 
> 6. Wait for slot reset to complete while it transitions to
> PHB4_SLOT_FRESET and optionally to PHB4_SLOT_LINK_START.
> 
> 7. Once slot reset is complete opal pci-core state machine will call
> slot->ops.completed_sm_run().
> 
> 8. For PHB4 this branches newly introduced 'phb4_slot_sm_run_completed()'.
> 
> 9. Inside this function we mark the CAPP as disabled and un-register
> the opal syncer phb4_host_sync_reset().
> 
> 10. Optionally if the slot reset was unsuccessful disable
> fast-reboot.
> 
> ****************************
> Notes:
> ****************************
> a. Function 'disable_capi_mode()' performs various sanity tests on CAPP to
> to determine if its ok to disable it and perform necessary xscoms
> to disable it. However the current implementation proposed in this
> patch is a skeleton one that just does sanity tests. A followup patch
> will be proposed that implements the xscoms necessary to disable CAPP.
> 
> b. The sequence expects that Opal PCI reset state machine makes
> forward progress hence needs someone to call slot->ops.run_sm(). This
> can be either from phb4_host_sync_reset() or opal_pci_poll().
> 
> Signed-off-by: Vaibhav Jain <vaibhav at linux.ibm.com>
> ---
> Change-log:
> 
> v2:	Removed the usage of global mutex 'capi_lock'. [Andrew, Stewart]
> 	Populate and use the newly introduced 'struct capp' to
> 	maintain the state of capp.
> ---
>   hw/phb4.c      | 98 +++++++++++++++++++++++++++++++++++++++++++++++++-
>   include/phb4.h |  1 +
>   2 files changed, 98 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/phb4.c b/hw/phb4.c
> index 97c77a89..447ba902 100644
> --- a/hw/phb4.c
> +++ b/hw/phb4.c
> @@ -2799,6 +2799,36 @@ static bool phb4_host_sync_reset(void *data)
>   	return rc <= OPAL_SUCCESS;
>   }
> 
> +/*
> + * Notification from the pci-core that a pci slot state machine completed.
> + * We use this callback to mark the CAPP disabled if we were waiting for it.
> + */
> +static int64_t phb4_slot_sm_run_completed(struct pci_slot *slot, uint64_t err)
> +{
> +	struct phb4 *p = phb_to_phb4(slot->phb);
> +
> +	/* Check if we are disabling the capp */
> +	if (p->flags & PHB4_CAPP_DISABLE) {
> +
> +		/* Unset struct capp so that we dont fall into a creset loop */
> +		p->flags &= ~(PHB4_CAPP_DISABLE);
> +		p->phb.capp->phb = NULL;
> +		p->phb.capp->attached_pe = phb4_get_reserved_pe_number(&p->phb);
> +
> +		/* Remove the host sync notifier is we are done.*/
> +		opal_del_host_sync_notifier(phb4_host_sync_reset, p);
> +		if (err) {
> +			/* Force a CEC ipl reboot */
> +			disable_fast_reboot("CAPP: reset failed");
> +			PHBERR(p, "CAPP: Unable to reset. Error=%lld\n", err);
> +		} else {
> +			PHBINF(p, "CAPP: reset complete\n");
> +		}
> +	}
> +
> +	return OPAL_SUCCESS;
> +}
> +
>   static int64_t phb4_poll_link(struct pci_slot *slot)
>   {
>   	struct phb4 *p = phb_to_phb4(slot->phb);
> @@ -3155,6 +3185,43 @@ static int do_capp_recovery_scoms(struct phb4 *p)
>   	return rc;
>   }
> 
> +/*
> + * Disable CAPI mode on a PHB. Must be done while PHB is fenced and
> + * not in recovery.
> + */
> +static void disable_capi_mode(struct phb4 *p)
> +{
> +	uint64_t reg;
> +	struct capp *capp = p->phb.capp;
> +
> +	PHBINF(p, "CAPP: Deactivating\n");
> +
> +	/* Check if CAPP attached to the PHB and active */
> +	if (!capp || capp->phb != &p->phb) {
> +		PHBDBG(p, "CAPP: Not attached to this PHB!\n");
> +		return;
> +	}
> +
> +	xscom_read(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, &reg);
> +	if (!(reg & PPC_BIT(0))) {
> +		/* Not in CAPI mode, no action required */
> +		PHBERR(p, "CAPP: Not enabled!\n");
> +		return;
> +	}
> +
> +	/* CAPP should already be out of recovery in this function */
> +	xscom_read(p->chip_id, capp->capp_xscom_offset +
> +		   CAPP_ERR_STATUS_CTRL, &reg);
> +	if (reg & PPC_BIT(0)) {
> +		PHBERR(p, "CAPP: Cant disable while still in recovery!\n");
> +		return;
> +	}
> +
> +	PHBINF(p, "CAPP: Disabling CAPI mode\n");
> +
> +	/* Implement procedure to disable CAPP based on h/w sequence */
> +}
> +
>   static int64_t phb4_creset(struct pci_slot *slot)
>   {
>   	struct phb4 *p = phb_to_phb4(slot->phb);
> @@ -3215,6 +3282,9 @@ static int64_t phb4_creset(struct pci_slot *slot)
>   			    (do_capp_recovery_scoms(p) != OPAL_SUCCESS))
>   				goto error;
> 
> +			if (p->flags & PHB4_CAPP_DISABLE)
> +				disable_capi_mode(p);
> +
>   			/* Clear errors in PFIR and NFIR */
>   			xscom_write(p->chip_id, p->pci_stk_xscom + 0x1,
>   				    ~p->pfir_cache);
> @@ -3318,6 +3388,7 @@ static struct pci_slot *phb4_slot_create(struct phb *phb)
>   	slot->ops.hreset		= phb4_hreset;
>   	slot->ops.freset		= phb4_freset;
>   	slot->ops.creset		= phb4_creset;
> +	slot->ops.completed_sm_run	= phb4_slot_sm_run_completed;
>   	slot->link_retries		= PHB4_LINK_LINK_RETRIES;
> 
>   	return slot;
> @@ -4480,12 +4551,37 @@ static int64_t phb4_set_capi_mode(struct phb *phb, uint64_t mode,
>   		break;
> 
>   	case OPAL_PHB_CAPI_MODE_SNOOP_OFF:
> -	case OPAL_PHB_CAPI_MODE_PCIE: /* Not supported at the moment */
>   		ret = phb->capp->phb ? OPAL_UNSUPPORTED : OPAL_SUCCESS;
>   		break;
> 
> +	case OPAL_PHB_CAPI_MODE_PCIE:
> +		if (p->flags & PHB4_CAPP_DISABLE) {
> +			/* We are in middle of a CAPP disable */
> +			ret = OPAL_BUSY;
> +
> +		} else if (capp->phb) {

what happens if a CAPP recovery is in progress or has failed, since
the check tests have been moved ?
Do phb4_creset() exits without issues ?
Thanks

> +			/* Kick start a creset */
> +			p->flags |= PHB4_CAPP_DISABLE;
> +			PHBINF(p, "CAPP: PCIE mode needs a cold-reset\n");
> +			/* Kick off the pci state machine */
> +			ret = phb4_creset(phb->slot);
> +			ret = ret > 0 ? OPAL_BUSY : ret;
> +
> +		} else {
> +			/* PHB already in PCI mode */
> +			ret = OPAL_SUCCESS;
> +		}
> +		break;
> +
>   	case OPAL_PHB_CAPI_MODE_CAPI: /* Fall Through */
>   	case OPAL_PHB_CAPI_MODE_DMA_TVT1:
> +		/* Make sure that PHB is not disabling CAPP */
> +		if (p->flags & PHB4_CAPP_DISABLE) {
> +			PHBERR(p, "CAPP: Disable in progress\n");
> +			ret = OPAL_BUSY;
> +			break;
> +		}
> +
>   		/* Check if ucode is available */
>   		if (!capp_ucode_loaded(chip, p->index)) {
>   			PHBERR(p, "CAPP: ucode not loaded\n");
> diff --git a/include/phb4.h b/include/phb4.h
> index 43819d57..0f06dcbf 100644
> --- a/include/phb4.h
> +++ b/include/phb4.h
> @@ -166,6 +166,7 @@ struct phb4_err {
>   #define PHB4_CFG_USE_ASB	0x00000002
>   #define PHB4_CFG_BLOCKED	0x00000004
>   #define PHB4_CAPP_RECOVERY	0x00000008
> +#define PHB4_CAPP_DISABLE	0x00000010
> 
>   struct phb4 {
>   	unsigned int		index;	    /* 0..5 index inside p9 */
> 



More information about the Skiboot mailing list