[Skiboot] [PATCH 1/5] npu2-opencapi: Use presence detection

Frederic Barrat fbarrat at linux.ibm.com
Sat Apr 14 00:10:21 AEST 2018



Le 13/04/2018 à 09:24, Andrew Donnellan a écrit :
> On 12/04/18 22:45, Frederic Barrat wrote:
>> From: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
>>
>> Presence detection is not part of the opencapi specification. So each
>> platform may choose to implement it the way it wants.
>>
>> All current platforms implement it through an i2c device where we can
>> query a pin to know if a device is connected or not. ZZ and Zaius have
>> a similar design and even use the same i2c information and pin
>> numbers.
>> However, presence detection on older ZZ planar (older than v4) doesn't
>> work, so we don't activate it for now, until our lab systems are
>> upgraded and it's better tested.
>>
>> Presence detection on witherspoon is still being worked on, but is
>> very likely to follow something similar, i.e. an i2c pin to query per
>> device.
>>
>> Signed-off-by: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
> 
> A few comments below. Smoketested this on one of our zaiuses, which 
> let's be honest was probably one of the same machines you tested on...

:-)
I had borrowed some other zaius to test several configurations. I've 
tested a cpu with only the "top" slot busy. Another cpu with only the 
"bottom" slot busy. "Our" zaius has a cpu with the 2 slots busy. It was 
all looking good, the i2c operation is giving the expected results. On 
zaius. On ZZ, it's a different story, as indicated in the patch.


> Apart from comments below:
> 
> Acked-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> 
>> ---
>>   core/platform.c          |  14 +++++-
>>   hw/npu2-opencapi.c       | 121 
>> ++++++++++++++++++++++++++++++++++++++---------
>>   include/platform.h       |   4 ++
>>   platforms/astbmc/zaius.c |   9 +++-
>>   platforms/ibm-fsp/zz.c   |  14 +++++-
>>   5 files changed, 137 insertions(+), 25 deletions(-)
>>
>> diff --git a/core/platform.c b/core/platform.c
>> index f09ea3c1..174235d9 100644
>> --- a/core/platform.c
>> +++ b/core/platform.c
>> @@ -34,6 +34,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, 
>> OPAL_PLATFORM_ERR_EVT, OPAL_CEC,
>>            OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT,
>>            OPAL_ABNORMAL_POWER_OFF);
>> +#define OCAPI_I2C_PRESENCE_ADDR    0x20
>> +#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
>> +#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
>> +
> 
> I'm not convinced there's an advantage to having these as #defines 
> rather than literals in platform_ocapi like all the other stuff already 
> there.

So you're not going to like the next patch, where I align the reset 
parameters to look the same. I can get rid of the macros, I don't feel 
strongly about it.

> In any case, it should be noted that these values, like everything else 
> in generic_ocapi, are for a ZZ.

True.

I'll wait for the review of the other patches to send a respin.

   Fred



>>   /*
>>    * Various wrappers for platform functions
>>    */
>> @@ -175,7 +179,15 @@ const struct platform_ocapi generic_ocapi = {
>>       .i2c_offset    = { 0x3, 0x1, 0x1 },
>>       .i2c_odl0_data    = { 0xFD, 0xFD, 0xFF },
>>       .i2c_odl1_data    = { 0xBF, 0xBF, 0xFF },
>> -    .odl_phy_swap    = true,
>> +    .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
>> +    .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
>> +    .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
>> +    /*
>> +     * i2c presence detection is broken on ZZ planar < v4 so we
>> +     * force the presence until all our systems are upgraded
>> +     */
>> +    .force_presence    = true,
>> +    .odl_phy_swap   = true,
>>   };
>>   static struct bmc_platform generic_bmc = {
>> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
>> index 2cc776c7..b2bc0628 100644
>> --- a/hw/npu2-opencapi.c
>> +++ b/hw/npu2-opencapi.c
>> @@ -829,6 +829,44 @@ static void reset_ocapi_device(struct npu2_dev *dev)
>>       }
>>   }
>> +static bool i2c_presence_detect(struct npu2_dev *dev)
>> +{
>> +    uint8_t state, data;
>> +    int rc;
>> +
>> +    /*
>> +     * Opencapi presence detection is done through i2c
>> +     *
>> +     * Lagrange platforms (ZZ, Zaius) use the same default mechanism.
>> +     * Witherspoon will need a specific implementation, TBD.
>> +     */
>> +    rc = i2c_request_send(dev->i2c_port_id_ocapi,
>> +            platform.ocapi->i2c_presence_addr,
>> +            SMBUS_READ, 0, 1,
>> +            &state, 1, 120);
>> +    if (rc) {
>> +        prlog(PR_ERR, "OCAPI: error detecting link presence: %d\n",
>> +            rc);
>> +        return true; /* assume link exists */
>> +    }
>> +
>> +    prlog(PR_DEBUG, "OCAPI: I2C presence detect: 0x%x\n", state);
>> +
>> +    switch (dev->index) {
>> +    case 2:
>> +        data = platform.ocapi->i2c_presence_odl0;
>> +        break;
>> +    case 3:
>> +        data = platform.ocapi->i2c_presence_odl1;
>> +        break;
>> +    default:
>> +        prlog(PR_ERR, "OCAPI: presence detection on invalid link\n");
>> +        return true;
>> +    }
>> +    /* Presence detect bits are active low */
>> +    return !(state & data);
>> +}
>> +
>>   static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev 
>> *dev)
>>   {
>>       uint64_t reg, config_xscom;
>> @@ -896,6 +934,20 @@ static int odl_train(uint32_t gcid, uint32_t 
>> index, struct npu2_dev *dev)
>>       return OPAL_HARDWARE;
>>   }
>> +static int64_t npu2_opencapi_get_presence_state(struct pci_slot *slot,
>> +                        uint8_t *val)
>> +{
>> +    bool present;
>> +    struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
>> +
>> +    if (platform.ocapi->force_presence)
>> +        present = true;
>> +    else
>> +        present = i2c_presence_detect(dev);
>> +    *val = present;
>> +    return OPAL_SUCCESS;
>> +}
>> +
>>   static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, 
>> uint8_t *val)
>>   {
>>       struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
>> @@ -926,9 +978,9 @@ static struct pci_slot 
>> *npu2_opencapi_slot_create(struct phb *phb)
>>           return slot;
>>       /* TODO: Figure out other slot functions */
>> -    slot->ops.get_presence_state = NULL;
>> -    slot->ops.get_link_state = npu2_opencapi_get_link_state;
>> -    slot->ops.get_power_state = NULL;
>> +    slot->ops.get_presence_state  = npu2_opencapi_get_presence_state;
>> +    slot->ops.get_link_state      = npu2_opencapi_get_link_state;
>> +    slot->ops.get_power_state     = NULL;
>>       slot->ops.get_attention_state = NULL;
>>       slot->ops.get_latch_state     = NULL;
>>       slot->ops.set_power_state     = NULL;
>> @@ -1264,6 +1316,48 @@ static int setup_irq(struct npu2 *p)
>>   #define LINK_TRAINING_RETRIES    5
>> +static int train_link(int chip_id, struct npu2_dev *dev)
>> +{
>> +    bool train;
>> +    int rc;
>> +    int retries = LINK_TRAINING_RETRIES;
>> +
>> +    if (platform.ocapi->force_presence)
>> +        train = true;
>> +    else
>> +        train = i2c_presence_detect(dev);
>> +    if (!train) {
>> +        /*
>> +         * FIXME: if there's no card on the link, we should consider
>> +         * powering off the unused lanes to save energy
>> +         */
>> +        prlog(PR_INFO, "OCAPI: no card detected on link %d, chip %d\n",
>> +            dev->index, chip_id);
>> +        return -1;
>> +    }
>> +
>> +    do {
>> +        rc = odl_train(chip_id, dev->index, dev);
>> +    } while (rc != OPAL_SUCCESS && --retries);
>> +
>> +    if (rc != OPAL_SUCCESS && retries == 0) {
>> +        /**
>> +         * @fwts-label OCAPILinkTrainingFailed
>> +         * @fwts-advice The OpenCAPI link training procedure failed.
>> +         * This indicates a hardware or firmware bug. OpenCAPI
>> +         * functionality will not be available on this link.
>> +         */
>> +        prlog(PR_ERR,
>> +            "OCAPI: Link %d on chip %u failed to train\n",
>> +            dev->index, chip_id);
>> +        prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
>> +            get_odl_status(chip_id, dev->index));
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   static void npu2_opencapi_setup_device(struct dt_node *dn_link, 
>> struct npu2 *n,
>>                          struct npu2_dev *dev)
>>   {
>> @@ -1272,7 +1366,6 @@ static void npu2_opencapi_setup_device(struct 
>> dt_node *dn_link, struct npu2 *n,
>>       struct pci_slot *slot;
>>       char port_name[17];
>>       uint64_t mm_win[2];
>> -    int retries = LINK_TRAINING_RETRIES;
>>       int rc;
>>       dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
>> @@ -1367,27 +1460,11 @@ static void npu2_opencapi_setup_device(struct 
>> dt_node *dn_link, struct npu2 *n,
>>           break;
>>       case NPU2_TRAIN_DEFAULT:
>> -        do {
>> -            rc = odl_train(n->chip_id, dev->index, dev);
>> -        } while (rc != OPAL_SUCCESS && --retries);
>> -
>> -        if (rc != OPAL_SUCCESS && retries == 0) {
>> -            /**
>> -             * @fwts-label OCAPILinkTrainingFailed
>> -             * @fwts-advice The OpenCAPI link training procedure failed.
>> -             * This indicates a hardware or firmware bug. OpenCAPI
>> -             * functionality will not be available on this link.
>> -             */
>> -            prlog(PR_ERR,
>> -                "OCAPI: Link %d on chip %u failed to train\n",
>> -                dev->index, n->chip_id);
>> -            prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
>> -                get_odl_status(n->chip_id, dev->index));
>> +        rc = train_link(n->chip_id, dev);
>> +        if (rc)
>>               goto failed;
>> -        }
>>           otl_enabletx(n->chip_id, n->xscom_base, dev->index);
>> -
>>           slot = npu2_opencapi_slot_create(&dev->phb_ocapi);
>>           if (!slot) {
>>               /**
>> diff --git a/include/platform.h b/include/platform.h
>> index a7776446..9a04ab37 100644
>> --- a/include/platform.h
>> +++ b/include/platform.h
>> @@ -51,6 +51,10 @@ struct platform_ocapi {
>>       uint32_t i2c_offset[3];        /* Offsets on I2C device */
>>       uint8_t i2c_odl0_data[3];    /* Data to reset ODL0 */
>>       uint8_t i2c_odl1_data[3];    /* Data to reset ODL1 */
>> +    uint8_t i2c_presence_addr;    /* I2C address for presence 
>> detection */
>> +    uint8_t i2c_presence_odl0;    /* I2C pin to read to detect ODL0 */
>> +    uint8_t i2c_presence_odl1;    /* I2C pin to read to detect ODL1 */
> 
> We use these as a mask rather than a pin index, maybe note that explicitly
> 
>> +    bool force_presence;            /* don't use i2c detection */
>>       bool odl_phy_swap;        /* Swap ODL1 to use brick 2 rather than
>>                        * brick 1 lanes */
>>   };
>> diff --git a/platforms/astbmc/zaius.c b/platforms/astbmc/zaius.c
>> index 7678ad12..279fbf8b 100644
>> --- a/platforms/astbmc/zaius.c
>> +++ b/platforms/astbmc/zaius.c
>> @@ -24,13 +24,20 @@
>>   #include "astbmc.h"
>> +#define OCAPI_I2C_PRESENCE_ADDR    0x20
>> +#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
>> +#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
>> +
>>   const struct platform_ocapi zaius_ocapi = {
>>       .i2c_engine    = 1,
>>       .i2c_port    = 4,
>>       .i2c_offset    = { 0x3, 0x1, 0x1 },
>>       .i2c_odl0_data    = { 0xFD, 0xFD, 0xFF },
>>       .i2c_odl1_data    = { 0xBF, 0xBF, 0xFF },
>> -    .odl_phy_swap    = true,
>> +    .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
>> +    .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
>> +    .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
>> +    .odl_phy_swap   = true,
>>   };
>>   #define NPU_BASE 0x5011000
>> diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
>> index 9a849290..66f29d44 100644
>> --- a/platforms/ibm-fsp/zz.c
>> +++ b/platforms/ibm-fsp/zz.c
>> @@ -27,6 +27,10 @@
>>   #include "ibm-fsp.h"
>>   #include "lxvpd.h"
>> +#define OCAPI_I2C_PRESENCE_ADDR    0x20
>> +#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
>> +#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
>> +
>>   /* We don't yet create NPU device nodes on ZZ, but these values are 
>> correct */
>>   const struct platform_ocapi zz_ocapi = {
>>       .i2c_engine    = 1,
>> @@ -34,7 +38,15 @@ const struct platform_ocapi zz_ocapi = {
>>       .i2c_offset    = { 0x3, 0x1, 0x1 },
>>       .i2c_odl0_data    = { 0xFD, 0xFD, 0xFF },
>>       .i2c_odl1_data    = { 0xBF, 0xBF, 0xFF },
>> -    .odl_phy_swap    = true,
>> +    .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
>> +    .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
>> +    .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
>> +    /*
>> +     * i2c presence detection is broken on ZZ planar < v4 so we
>> +     * force the presence until all our systems are upgraded
>> +     */
>> +    .force_presence    = true,
>> +    .odl_phy_swap   = true,
>>   };
>>   static bool zz_probe(void)
>>
> 



More information about the Skiboot mailing list