[Skiboot] [RFC PATCH 03/23] core/pci-opal: Convert set_power_state timer

Frederic Barrat fbarrat at linux.ibm.com
Thu May 16 00:55:40 AEST 2019



Le 03/04/2019 à 11:09, Oliver O'Halloran a écrit :
> Crack this in two so that we bring the slot power up, then bring up the
> link using the slot state machine. This allows us to properly handle
> hotplug on root complex slots since we can't rely on the generic code to
> bring the link up properly.
> 
> Signed-off-by: Oliver O'Halloran <oohall at gmail.com>
> ---
>   core/pci-opal.c | 187 ++++++++++++++++++++++++++++++++----------------
>   1 file changed, 127 insertions(+), 60 deletions(-)
> 
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index d7abb15b2bad..68406f3b31ba 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -709,7 +709,7 @@ static int64_t opal_pci_get_power_state(uint64_t id, uint64_t data)
>   }
>   opal_call(OPAL_PCI_GET_POWER_STATE, opal_pci_get_power_state, 2);
>   
> -static void set_power_timer(struct timer *t __unused, void *data,
> +static void link_up_timer(struct timer *t __unused, void *data,
>   			    uint64_t now __unused)
>   {
>   	struct pci_slot *slot = data;
> @@ -717,57 +717,118 @@ static void set_power_timer(struct timer *t __unused, void *data,
>   	struct pci_device *pd = slot->pd;
>   	struct dt_node *dn = pd->dn;
>   	uint8_t link;
> +	int64_t rc = OPAL_BUSY;
>   
> -	switch (slot->state) {
> -	case PCI_SLOT_STATE_SPOWER_START:
> -		if (slot->retries-- == 0) {
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> -			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> -				       slot->async_token, dn->phandle,
> -				       slot->power_state, OPAL_BUSY);
> -		} else {
> -			schedule_timer(&slot->timer, msecs_to_tb(10));
> -		}
> +	/*
> +	 * PHB slots need to run throught their link up/down procedure
> +	 * in order to work, so... let that happen.
> +	 *
> +	 * NB: We don't decrement retries here since we the PHB slot
> +	 * does its own retry accounting.
> +	 */
> +	rc = slot->ops.run_sm(slot);
> +	if (rc < 0) // error, abort
> +		goto done;
> +	if (rc > 0 && slot->retries-- <= 0) // timeout, abort
> +		goto done;
> +	if (rc > 0) { // kick the can down the road
> +		schedule_timer(t, msecs_to_tb(10));
> +		return;
> +	}


I've been using this patch series to implement an opencapi adapter reset 
through the PCI hotplug driver on linux. I was interested in the 
separation between power and reset states, so the timing for this series 
was perfect!

For opencapi and hotplug, I'm actually cheating a bit: the hotplug 
driver gives a sysfs interface to power off/on a slot. I'm using that 
with the (fake) PCI slot for the (virtual) opencapi PHB. So I'm not 
really powering off/on the card. Instead I'm quiescing it by setting it 
in reset (power off) or releasing the reset signal (power on).

I had to do a few tweaks I'd like to discuss here:
- after power on, I need to trigger what is currently a freset for 
opencapi to go through link training. This series assumes that it is 
done by setting the slot state to PCI_SLOT_STATE_LINK_START_POLL and run 
the state machine. It would be good to have some flexibility there to 
get the link training rolling.

- the slot->retry count is used in set_power_timer(), but it also tends 
to be used in the various link training procedures. I'm thinking we may 
not need it, and rely on the reset/link training procedures to detect 
timeouts (most seem to already do), or have a dedicated power retry count?

- not really related to this patch set, but is there a reason why PCI 
hotplug is not enabled for P9? The linux driver (pnv_phb.c) looks for 
"ibm,ioda2-phb" PHBs, but on P9, the PHBs are compatible with 
"ibm,ioda3-phb".


   Fred


>   
> -		break;
> -	case PCI_SLOT_STATE_SPOWER_DONE:
> -		if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
> -			pci_remove_bus(phb, &pd->children);
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> -			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> -				       slot->async_token, dn->phandle,
> -				       OPAL_PCI_SLOT_POWER_OFF, OPAL_SUCCESS);
> -			break;
> -		}
> +	if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> +		link = 0;
> +	if (link) {
> +		prerror("scanning slot...\n");
> +		slot->ops.prepare_link_change(slot, true);
>   
> -		/* Power on */
> -		if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> -			link = 0;
> -		if (link) {
> -			slot->ops.prepare_link_change(slot, true);
> -			pci_scan_bus(phb, pd->secondary_bus,
> -				     pd->subordinate_bus,
> -				     &pd->children, pd, true);
> -			pci_add_device_nodes(phb, &pd->children, dn,
> -					     &phb->lstate, 0);
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> -			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> -				       slot->async_token, dn->phandle,
> -				       OPAL_PCI_SLOT_POWER_ON, OPAL_SUCCESS);
> -		} else if (slot->retries-- == 0) {
> -			pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> -			opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> -				       slot->async_token, dn->phandle,
> -				       OPAL_PCI_SLOT_POWER_ON, OPAL_BUSY);
> -		} else {
> +		pci_scan_bus(phb, pd->secondary_bus,
> +			     pd->subordinate_bus,
> +			     &pd->children, pd, true);
> +		pci_add_device_nodes(phb, &pd->children, dn,
> +				     &phb->lstate, 0);
> +	}
> +
> +done:
> +	/*
> +	 * We need to send a completion message back to the kernel, otherwise
> +	 * it'll sit there forever.
> +	 */
> +	pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> +
> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +		       slot->async_token, dn->phandle,
> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> +	prerror("sent competition state = %lld from %s\n", rc, __func__);
> +}
> +
> +static void set_power_timer(struct timer *t, void *data, uint64_t now)
> +{
> +	struct pci_slot *slot = data;
> +	//struct phb *phb = slot->phb;
> +	struct pci_device *pd = slot->pd;
> +	struct dt_node *dn = pd->dn;
> +	//uint8_t link;
> +	struct phb *phb = slot->phb;
> +	int64_t rc = OPAL_BUSY;
> +
> +	/* XXX: Do we need to take the phb lock here? */
> +
> +	prlog(PR_TRACE, "%s: state = %x, retries: %lld, link_retries: %lld\n",
> +			__func__, slot->state, slot->retries, slot->link_retries);
> +
> +	// FIXME: We should do something better than just calling it repeatedly,
> +	// a seperate poll_power_state() function might be a better thing.
> +	// Maybe have get_power_state() return the in-progress thing.
> +	rc = slot->ops.set_power_state(slot, slot->power_state);
> +
> +	/* error occured! oh no! */
> +	if (rc < OPAL_SUCCESS)
> +		goto done;
> +	if (rc > OPAL_SUCCESS) {
> +		if (slot->retries) {
> +			slot->retries--;
>   			schedule_timer(&slot->timer, msecs_to_tb(10));
> +			return;
> +		} else {
> +			goto done;
>   		}
> +	}
>   
> -		break;
> -	default:
> -		prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
> -		      slot->id, slot->state);
> +	/*
> +	 * In the power off case we're done, so notify the OS
> +	 *
> +	 * FIXME: We should probably do something to put the slot back into
> +	 * the POLLING state or something...
> +	 */
> +	if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
> +		pci_remove_bus(phb, &pd->children);
> +		rc = OPAL_SUCCESS;
> +		goto done;
>   	}
> +
> +	/*
> +	 * In the power up case run through the FRESET handler. This is hella
> +	 * dumb and we need to do something less stupid in future.
> +	 */
> +	pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_START_POLL);
> +
> +	// switch to using the link poll timer
> +	init_timer(&slot->timer, link_up_timer, slot);
> +	link_up_timer(t, data, now);
> +
> +	return;
> +done:
> +	/*
> +	 * We need to send a completion message back to the kernel, otherwise
> +	 * it'll sit there forever.
> +	 */
> +	pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> +
> +	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> +		       slot->async_token, dn->phandle,
> +		       slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> +	prerror("sent competition state = %lld from %s\n", rc, __func__);
>   }
>   
>   static int64_t opal_pci_set_power_state(uint64_t async_token,
> @@ -834,25 +895,31 @@ static int64_t opal_pci_set_power_state(uint64_t async_token,
>   	}
>   
>   	/*
> -	 * OPAL_ASYNC_COMPLETION is returned when delay is needed to change
> -	 * the power state in the backend. When it can be finished without
> -	 * delay, OPAL_SUCCESS is returned. The PCI topology needs to be
> -	 * updated in both cases.
> +	 * set_power_state returning > 0 indicates we need to wait
> +	 * a bit, so setup a timer and return ASYNC_COMPLETION.
> +	 *
> +	 * FIXME: Use the wait information set_power_state returns.
>   	 */
> -	if (rc == OPAL_ASYNC_COMPLETION) {
> +	if (rc > 0) {
>   		slot->retries = 500;
>   		init_timer(&slot->timer, set_power_timer, slot);
>   		schedule_timer(&slot->timer, msecs_to_tb(10));
> -	} else if (rc == OPAL_SUCCESS) {
> -		if (*state == OPAL_PCI_SLOT_POWER_OFF) {
> -			pci_remove_bus(phb, &pd->children);
> -		} else {
> -			slot->ops.prepare_link_change(slot, true);
> -			pci_scan_bus(phb, pd->secondary_bus,
> -				pd->subordinate_bus, &pd->children, pd, true);
> -			pci_add_device_nodes(phb, &pd->children, pd->dn,
> -				&phb->lstate, 0);
> -		}
> +		rc = OPAL_ASYNC_COMPLETION;
> +	}
> +	if (rc != OPAL_SUCCESS) {
> +		phb_unlock(phb);
> +		return rc;
> +	}
> +
> +	/* Otherwise scan the slot so we have the new device in the DT */
> +	if (*state == OPAL_PCI_SLOT_POWER_ON) {
> +		slot->ops.prepare_link_change(slot, true);
> +		pci_scan_bus(phb, pd->secondary_bus,
> +			pd->subordinate_bus, &pd->children, pd, true);
> +		pci_add_device_nodes(phb, &pd->children, pd->dn,
> +			&phb->lstate, 0);
> +	} else {
> +		pci_remove_bus(phb, &pd->children);
>   	}
>   
>   	phb_unlock(phb);
> 



More information about the Skiboot mailing list