[Skiboot] [RFC PATCH 03/23] core/pci-opal: Convert set_power_state timer
Frederic Barrat
fbarrat at linux.ibm.com
Thu May 16 00:55:40 AEST 2019
Le 03/04/2019 à 11:09, Oliver O'Halloran a écrit :
> Crack this in two so that we bring the slot power up, then bring up the
> link using the slot state machine. This allows us to properly handle
> hotplug on root complex slots since we can't rely on the generic code to
> bring the link up properly.
>
> Signed-off-by: Oliver O'Halloran <oohall at gmail.com>
> ---
> core/pci-opal.c | 187 ++++++++++++++++++++++++++++++++----------------
> 1 file changed, 127 insertions(+), 60 deletions(-)
>
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index d7abb15b2bad..68406f3b31ba 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -709,7 +709,7 @@ static int64_t opal_pci_get_power_state(uint64_t id, uint64_t data)
> }
> opal_call(OPAL_PCI_GET_POWER_STATE, opal_pci_get_power_state, 2);
>
> -static void set_power_timer(struct timer *t __unused, void *data,
> +static void link_up_timer(struct timer *t __unused, void *data,
> uint64_t now __unused)
> {
> struct pci_slot *slot = data;
> @@ -717,57 +717,118 @@ static void set_power_timer(struct timer *t __unused, void *data,
> struct pci_device *pd = slot->pd;
> struct dt_node *dn = pd->dn;
> uint8_t link;
> + int64_t rc = OPAL_BUSY;
>
> - switch (slot->state) {
> - case PCI_SLOT_STATE_SPOWER_START:
> - if (slot->retries-- == 0) {
> - pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> - opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> - slot->async_token, dn->phandle,
> - slot->power_state, OPAL_BUSY);
> - } else {
> - schedule_timer(&slot->timer, msecs_to_tb(10));
> - }
> + /*
> + * PHB slots need to run throught their link up/down procedure
> + * in order to work, so... let that happen.
> + *
> + * NB: We don't decrement retries here since we the PHB slot
> + * does its own retry accounting.
> + */
> + rc = slot->ops.run_sm(slot);
> + if (rc < 0) // error, abort
> + goto done;
> + if (rc > 0 && slot->retries-- <= 0) // timeout, abort
> + goto done;
> + if (rc > 0) { // kick the can down the road
> + schedule_timer(t, msecs_to_tb(10));
> + return;
> + }
I've been using this patch series to implement an opencapi adapter reset
through the PCI hotplug driver on linux. I was interested in the
separation between power and reset states, so the timing for this series
was perfect!
For opencapi and hotplug, I'm actually cheating a bit: the hotplug
driver gives a sysfs interface to power off/on a slot. I'm using that
with the (fake) PCI slot for the (virtual) opencapi PHB. So I'm not
really powering off/on the card. Instead I'm quiescing it by setting it
in reset (power off) or releasing the reset signal (power on).
I had to do a few tweaks I'd like to discuss here:
- after power on, I need to trigger what is currently a freset for
opencapi to go through link training. This series assumes that it is
done by setting the slot state to PCI_SLOT_STATE_LINK_START_POLL and run
the state machine. It would be good to have some flexibility there to
get the link training rolling.
- the slot->retry count is used in set_power_timer(), but it also tends
to be used in the various link training procedures. I'm thinking we may
not need it, and rely on the reset/link training procedures to detect
timeouts (most seem to already do), or have a dedicated power retry count?
- not really related to this patch set, but is there a reason why PCI
hotplug is not enabled for P9? The linux driver (pnv_phb.c) looks for
"ibm,ioda2-phb" PHBs, but on P9, the PHBs are compatible with
"ibm,ioda3-phb".
Fred
>
> - break;
> - case PCI_SLOT_STATE_SPOWER_DONE:
> - if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
> - pci_remove_bus(phb, &pd->children);
> - pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> - opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> - slot->async_token, dn->phandle,
> - OPAL_PCI_SLOT_POWER_OFF, OPAL_SUCCESS);
> - break;
> - }
> + if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> + link = 0;
> + if (link) {
> + prerror("scanning slot...\n");
> + slot->ops.prepare_link_change(slot, true);
>
> - /* Power on */
> - if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> - link = 0;
> - if (link) {
> - slot->ops.prepare_link_change(slot, true);
> - pci_scan_bus(phb, pd->secondary_bus,
> - pd->subordinate_bus,
> - &pd->children, pd, true);
> - pci_add_device_nodes(phb, &pd->children, dn,
> - &phb->lstate, 0);
> - pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> - opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> - slot->async_token, dn->phandle,
> - OPAL_PCI_SLOT_POWER_ON, OPAL_SUCCESS);
> - } else if (slot->retries-- == 0) {
> - pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> - opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> - slot->async_token, dn->phandle,
> - OPAL_PCI_SLOT_POWER_ON, OPAL_BUSY);
> - } else {
> + pci_scan_bus(phb, pd->secondary_bus,
> + pd->subordinate_bus,
> + &pd->children, pd, true);
> + pci_add_device_nodes(phb, &pd->children, dn,
> + &phb->lstate, 0);
> + }
> +
> +done:
> + /*
> + * We need to send a completion message back to the kernel, otherwise
> + * it'll sit there forever.
> + */
> + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> +
> + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> + slot->async_token, dn->phandle,
> + slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> + prerror("sent competition state = %lld from %s\n", rc, __func__);
> +}
> +
> +static void set_power_timer(struct timer *t, void *data, uint64_t now)
> +{
> + struct pci_slot *slot = data;
> + //struct phb *phb = slot->phb;
> + struct pci_device *pd = slot->pd;
> + struct dt_node *dn = pd->dn;
> + //uint8_t link;
> + struct phb *phb = slot->phb;
> + int64_t rc = OPAL_BUSY;
> +
> + /* XXX: Do we need to take the phb lock here? */
> +
> + prlog(PR_TRACE, "%s: state = %x, retries: %lld, link_retries: %lld\n",
> + __func__, slot->state, slot->retries, slot->link_retries);
> +
> + // FIXME: We should do something better than just calling it repeatedly,
> + // a seperate poll_power_state() function might be a better thing.
> + // Maybe have get_power_state() return the in-progress thing.
> + rc = slot->ops.set_power_state(slot, slot->power_state);
> +
> + /* error occured! oh no! */
> + if (rc < OPAL_SUCCESS)
> + goto done;
> + if (rc > OPAL_SUCCESS) {
> + if (slot->retries) {
> + slot->retries--;
> schedule_timer(&slot->timer, msecs_to_tb(10));
> + return;
> + } else {
> + goto done;
> }
> + }
>
> - break;
> - default:
> - prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
> - slot->id, slot->state);
> + /*
> + * In the power off case we're done, so notify the OS
> + *
> + * FIXME: We should probably do something to put the slot back into
> + * the POLLING state or something...
> + */
> + if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
> + pci_remove_bus(phb, &pd->children);
> + rc = OPAL_SUCCESS;
> + goto done;
> }
> +
> + /*
> + * In the power up case run through the FRESET handler. This is hella
> + * dumb and we need to do something less stupid in future.
> + */
> + pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_START_POLL);
> +
> + // switch to using the link poll timer
> + init_timer(&slot->timer, link_up_timer, slot);
> + link_up_timer(t, data, now);
> +
> + return;
> +done:
> + /*
> + * We need to send a completion message back to the kernel, otherwise
> + * it'll sit there forever.
> + */
> + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> +
> + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> + slot->async_token, dn->phandle,
> + slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> + prerror("sent competition state = %lld from %s\n", rc, __func__);
> }
>
> static int64_t opal_pci_set_power_state(uint64_t async_token,
> @@ -834,25 +895,31 @@ static int64_t opal_pci_set_power_state(uint64_t async_token,
> }
>
> /*
> - * OPAL_ASYNC_COMPLETION is returned when delay is needed to change
> - * the power state in the backend. When it can be finished without
> - * delay, OPAL_SUCCESS is returned. The PCI topology needs to be
> - * updated in both cases.
> + * set_power_state returning > 0 indicates we need to wait
> + * a bit, so setup a timer and return ASYNC_COMPLETION.
> + *
> + * FIXME: Use the wait information set_power_state returns.
> */
> - if (rc == OPAL_ASYNC_COMPLETION) {
> + if (rc > 0) {
> slot->retries = 500;
> init_timer(&slot->timer, set_power_timer, slot);
> schedule_timer(&slot->timer, msecs_to_tb(10));
> - } else if (rc == OPAL_SUCCESS) {
> - if (*state == OPAL_PCI_SLOT_POWER_OFF) {
> - pci_remove_bus(phb, &pd->children);
> - } else {
> - slot->ops.prepare_link_change(slot, true);
> - pci_scan_bus(phb, pd->secondary_bus,
> - pd->subordinate_bus, &pd->children, pd, true);
> - pci_add_device_nodes(phb, &pd->children, pd->dn,
> - &phb->lstate, 0);
> - }
> + rc = OPAL_ASYNC_COMPLETION;
> + }
> + if (rc != OPAL_SUCCESS) {
> + phb_unlock(phb);
> + return rc;
> + }
> +
> + /* Otherwise scan the slot so we have the new device in the DT */
> + if (*state == OPAL_PCI_SLOT_POWER_ON) {
> + slot->ops.prepare_link_change(slot, true);
> + pci_scan_bus(phb, pd->secondary_bus,
> + pd->subordinate_bus, &pd->children, pd, true);
> + pci_add_device_nodes(phb, &pd->children, pd->dn,
> + &phb->lstate, 0);
> + } else {
> + pci_remove_bus(phb, &pd->children);
> }
>
> phb_unlock(phb);
>
More information about the Skiboot
mailing list