[Skiboot] [PATCH 04/16] core/pci: Train link of PHB slots when hotplugging

Frederic Barrat fbarrat at linux.ibm.com
Wed Sep 18 01:05:34 AEST 2019



Le 17/09/2019 à 11:02, christophe lombard a écrit :
> On 09/09/2019 14:31, Frederic Barrat wrote:
>> The link of PHB slots must be trained after powering on. This can be
>> done by calling the fundamental reset callback of the slot.
>>
>> We could force a reset for all the slots and have a common path in
>> set_power_state(). But this patch only resets the PHB slot. Some slot
>> implementations do a power cycle during fundamental reset, so calling
>> a reset after powering on would repeat that operation.
>>
>> Signed-off-by: Frederic Barrat <fbarrat at linux.ibm.com>
>> ---
>>   core/pci-opal.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
>>   1 file changed, 108 insertions(+), 22 deletions(-)
>>
>> diff --git a/core/pci-opal.c b/core/pci-opal.c
>> index 69bd65c6..6c070030 100644
>> --- a/core/pci-opal.c
>> +++ b/core/pci-opal.c
>> @@ -678,11 +678,111 @@ static void remove_slot_devices(struct pci_slot 
>> *slot)
>>       pci_remove_bus(phb, &pd->children);
>>   }
>> +static void link_training_timer(struct timer *t, void *data,
>> +                uint64_t now __unused)
>> +{
>> +    struct pci_slot *slot = data;
>> +    struct phb *phb = slot->phb;
>> +    uint8_t link;
>> +    int64_t rc = 0;
>> +
>> +    phb_lock(phb);
>> +
> 
> lock already taken in the main function set_power_timer()


link_training_timer() is called in a different context, when the timer 
pops after being scheduled in set_power_time(). So I think we need to 
take the lock as link_training_timer() could be executed concurrently 
with something else touching the link. And we don't deadlock.



>> +    rc = slot->ops.run_sm(slot);
>> +    if (rc < 0)
>> +        goto out;
>> +    if (rc > 0) {
>> +        schedule_timer(t, rc);
>> +        phb_unlock(phb);
>> +        return;
>> +    }
>> +
>> +    if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>> +        link = 0;
>> +    if (!link) {
>> +        rc = OPAL_HARDWARE;
>> +        goto out;
>> +    }
>> +
>> +    rescan_slot_devices(slot);
>> +out:
>> +    opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> +               slot->async_token, get_slot_phandle(slot),
>> +               slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
>> +    phb_unlock(phb);
>> +}
>> +
>> +static void link_up_timer(struct timer *t, void *data, uint64_t now 
>> __unused)
>> +{
>> +    struct pci_slot *slot = data;
>> +    struct phb *phb = slot->phb;
>> +    uint8_t link;
>> +    int64_t rc = OPAL_SUCCESS;
>> +
>> +    phb_lock(phb);
>> +    if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>> +        link = 0;
>> +    if (!link) {
>> +        if (slot->retries-- == 0) {
>> +            rc = OPAL_BUSY;
>> +            goto out;
>> +        } else {
>> +            schedule_timer(t, msecs_to_tb(10));
>> +            phb_unlock(phb);
>> +            return;
>> +        }
>> +    }
>> +
>> +    rescan_slot_devices(slot);
>> +out:
>> +    opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> +               slot->async_token, get_slot_phandle(slot),
>> +               slot->power_state, rc <= 0 ? rc : OPAL_BUSY);
>> +    phb_unlock(phb);
>> +}
>> +
>> +static bool training_needed(struct pci_slot *slot)
>> +{
>> +    struct phb *phb = slot->phb;
>> +    struct pci_device *pd = slot->pd;
>> +
>> +    if ((pd && !pd->parent) || /* "real" PHB */
>> +        (!pd && phb->phb_type == phb_type_npu_v2_opencapi))
>> +        return true;
>> +    return false;
>> +}
>> +
>> +static void wait_for_link_up_and_rescan(struct pci_slot *slot)
>> +{
>> +    int64_t rc;
>> +
>> +    /*
>> +     * Links for PHB slots need to be retrained by triggering a
>> +     * fundamental reset. Other slots also need to be tested for
>> +     * readiness
>> +     */
>> +    if (training_needed(slot)) {
>> +        rc = slot->ops.freset(slot);
>> +        if (rc < 0) {
>> +            opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> +                       slot->async_token,
>> +                       get_slot_phandle(slot),
>> +                       slot->power_state,
>> +                       rc <= 0 ? rc : OPAL_BUSY);
>> +            return;
>> +        }
>> +        init_timer(&slot->timer, link_training_timer, slot);
>> +    } else {
>> +        init_timer(&slot->timer, link_up_timer, slot);
>> +        rc = 1;
>> +    }
>> +    schedule_timer(&slot->timer, rc);
>> +}
>> +
>>   static void set_power_timer(struct timer *t __unused, void *data,
>>                   uint64_t now __unused)
>>   {
>>       struct pci_slot *slot = data;
>> -    uint8_t link;
>>       struct phb *phb = slot->phb;
>>       phb_lock(phb);
>> @@ -699,9 +799,9 @@ static void set_power_timer(struct timer *t 
>> __unused, void *data,
>>           break;
>>       case PCI_SLOT_STATE_SPOWER_DONE:
>> +        pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
> 
> any reason to update the state here ?


The power is now on and we'll soon have 2 bifurcated paths: either we 
retrain or we don't. We need to eventually reset the state to 
PCL_SLOT_STATE_NORMAL and this location happens to be common. I don't 
think it hurts to do it now instead of later (and in the case of link 
retraining, it's needed pretty early, before we call freset() )

   Fred



>>           if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) {
>>               remove_slot_devices(slot);
>> -            pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>>               opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>>                          slot->async_token, get_slot_phandle(slot),
>>                          OPAL_PCI_SLOT_POWER_OFF, OPAL_SUCCESS);
>> @@ -709,23 +809,7 @@ static void set_power_timer(struct timer *t 
>> __unused, void *data,
>>           }
>>           /* Power on */
>> -        if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS)
>> -            link = 0;
>> -        if (link) {
>> -            rescan_slot_devices(slot);
>> -            pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>> -            opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> -                       slot->async_token, get_slot_phandle(slot),
>> -                       OPAL_PCI_SLOT_POWER_ON, OPAL_SUCCESS);
>> -        } else if (slot->retries-- == 0) {
>> -            pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL);
>> -            opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
>> -                       slot->async_token, get_slot_phandle(slot),
>> -                       OPAL_PCI_SLOT_POWER_ON, OPAL_BUSY);
>> -        } else {
>> -            schedule_timer(&slot->timer, msecs_to_tb(10));
>> -        }
>> -
>> +        wait_for_link_up_and_rescan(slot);
>>           break;
>>       default:
>>           prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n",
>> @@ -808,10 +892,12 @@ static int64_t opal_pci_set_power_state(uint64_t 
>> async_token,
>>           init_timer(&slot->timer, set_power_timer, slot);
>>           schedule_timer(&slot->timer, msecs_to_tb(10));
>>       } else if (rc == OPAL_SUCCESS) {
>> -        if (*state == OPAL_PCI_SLOT_POWER_OFF)
>> +        if (*state == OPAL_PCI_SLOT_POWER_OFF) {
>>               remove_slot_devices(slot);
>> -        else
>> -            rescan_slot_devices(slot);
>> +        } else {
>> +            wait_for_link_up_and_rescan(slot);
>> +            rc = OPAL_ASYNC_COMPLETION;
>> +        }
>>       }
>>       phb_unlock(phb);
>>
> 



More information about the Skiboot mailing list