[Skiboot] [PATCH 04/16] core/pci: Train link of PHB slots when hotplugging
Oliver O'Halloran
oohall at gmail.com
Tue Oct 1 22:02:47 AEST 2019
On Mon, 2019-09-09 at 14:31 +0200, Frederic Barrat wrote:
> The link of PHB slots must be trained after powering on. This can be
> done by calling the fundamental reset callback of the slot.
>
> We could force a reset for all the slots and have a common path in
> set_power_state(). But this patch only resets the PHB slot. Some slot
> implementations do a power cycle during fundamental reset, so calling
> a reset after powering on would repeat that operation.
>
> Signed-off-by: Frederic Barrat <fbarrat at linux.ibm.com>
> ---
> core/pci-opal.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 108 insertions(+), 22 deletions(-)
>
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index 69bd65c6..6c070030 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -678,11 +678,111 @@ static void remove_slot_devices(struct pci_slot *slot)
> pci_remove_bus(phb, &pd->children);
> }
>
> +static void link_training_timer(struct timer *t, void *data,
> + uint64_t now __unused)
> +{
> + struct pci_slot *slot = data;
> + struct phb *phb = slot->phb;
> + uint8_t link;
> + int64_t rc = 0;
> +
> + phb_lock(phb);
> +
> + rc = slot->ops.run_sm(slot);
> + if (rc < 0)
> + goto out;
> + if (rc > 0) {
> + schedule_timer(t, rc);
> + phb_unlock(phb);
> + return;
> + }
> +
> + if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> + link = 0;
> + if (!link) {
> + rc = OPAL_HARDWARE;
> + goto out;
> + }
> +
> + rescan_slot_devices(slot);
> +out:
> + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> + slot->async_token, get_slot_phandle(slot),
> + slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> + phb_unlock(phb);
> +}
> +
> +static void link_up_timer(struct timer *t, void *data, uint64_t now __unused)
> +{
> + struct pci_slot *slot = data;
> + struct phb *phb = slot->phb;
> + uint8_t link;
> + int64_t rc = OPAL_SUCCESS;
> +
> + phb_lock(phb);
> + if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> + link = 0;
> + if (!link) {
> + if (slot->retries-- == 0) {
> + rc = OPAL_BUSY;
> + goto out;
> + } else {
> + schedule_timer(t, msecs_to_tb(10));
> + phb_unlock(phb);
> + return;
> + }
> + }
> +
> + rescan_slot_devices(slot);
> +out:
> + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> + slot->async_token, get_slot_phandle(slot),
> + slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
> + phb_unlock(phb);
> +}
> +
> +static bool training_needed(struct pci_slot *slot)
> +{
> + struct phb *phb = slot->phb;
> + struct pci_device *pd = slot->pd;
> +
> + if ((pd && !pd->parent) || /* "real" PHB */
> + (!pd && phb->phb_type == phb_type_npu_v2_opencapi))
> + return true;
> + return false;
> +}
Any reason why training_needed() couldn't just be a link state check?
That's basicly what's happening here anyway with a one timer tick wait
added
If that wait is important for some reason then that should be
reflected via a state machine transition rather.
> +static void wait_for_link_up_and_rescan(struct pci_slot *slot)
> +{
> + int64_t rc;
> +
> + /*
> + * Links for PHB slots need to be retrained by triggering a
> + * fundamental reset. Other slots also need to be tested for
> + * readiness
> + */
> + if (training_needed(slot)) {
> + rc = slot->ops.freset(slot);
> + if (rc < 0) {
> + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> + slot->async_token,
> + get_slot_phandle(slot),
> + slot->power_state,
> + rc <= 0 ? rc : OPAL_BUSY);
> + return;
> + }
> + init_timer(&slot->timer, link_training_timer, slot);
> + } else {
> + init_timer(&slot->timer, link_up_timer, slot);
> + rc = 1;
> + }
> + schedule_timer(&slot->timer, rc);
> +}
This doesn't make much sense to me. The only difference between the two
paths here is that in the !training_needed() case we call link_up_timer
which polls the link state with ops.get_link_state() rather than
cranking the state machine. However, this is more or less the scenario
that the POLL_LINK state is for, so... why not use that?
> static void set_power_timer(struct timer *t __unused, void *data,
> uint64_t now __unused)
> {
> struct pci_slot *slot = data;
> - uint8_t link;
> struct phb *phb = slot->phb;
>
> phb_lock(phb);
> @@ -699,9 +799,9 @@ static void set_power_timer(struct timer *t __unused, void *data,
>
> break;
> case PCI_SLOT_STATE_SPOWER_DONE:
> + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
> remove_slot_devices(slot);
> - pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> slot->async_token, get_slot_phandle(slot),
> OPAL_PCI_SLOT_POWER_OFF, OPAL_SUCCESS);
> @@ -709,23 +809,7 @@ static void set_power_timer(struct timer *t __unused, void *data,
> }
>
> /* Power on */
> - if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
> - link = 0;
> - if (link) {
> - rescan_slot_devices(slot);
> - pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> - opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> - slot->async_token, get_slot_phandle(slot),
> - OPAL_PCI_SLOT_POWER_ON, OPAL_SUCCESS);
> - } else if (slot->retries-- == 0) {
> - pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> - opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
> - slot->async_token, get_slot_phandle(slot),
> - OPAL_PCI_SLOT_POWER_ON, OPAL_BUSY);
> - } else {
> - schedule_timer(&slot->timer, msecs_to_tb(10));
> - }
> -
> + wait_for_link_up_and_rescan(slot);
> break;
> default:
> prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
> @@ -808,10 +892,12 @@ static int64_t opal_pci_set_power_state(uint64_t async_token,
> init_timer(&slot->timer, set_power_timer, slot);
> schedule_timer(&slot->timer, msecs_to_tb(10));
> } else if (rc == OPAL_SUCCESS) {
> - if (*state == OPAL_PCI_SLOT_POWER_OFF)
> + if (*state == OPAL_PCI_SLOT_POWER_OFF) {
> remove_slot_devices(slot);
> - else
> - rescan_slot_devices(slot);
> + } else {
> + wait_for_link_up_and_rescan(slot);
> + rc = OPAL_ASYNC_COMPLETION;
> + }
> }
>
> phb_unlock(phb);
More information about the Skiboot
mailing list