[Skiboot] [PATCH 1/5] npu2-opencapi: Use presence detection
Frederic Barrat
fbarrat at linux.ibm.com
Sat Apr 14 00:10:21 AEST 2018
Le 13/04/2018 à 09:24, Andrew Donnellan a écrit :
> On 12/04/18 22:45, Frederic Barrat wrote:
>> From: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
>>
>> Presence detection is not part of the opencapi specification. So each
>> platform may choose to implement it the way it wants.
>>
>> All current platforms implement it through an i2c device where we can
>> query a pin to know if a device is connected or not. ZZ and Zaius have
>> a similar design and even use the same i2c information and pin
>> numbers.
>> However, presence detection on older ZZ planar (older than v4) doesn't
>> work, so we don't activate it for now, until our lab systems are
>> upgraded and it's better tested.
>>
>> Presence detection on witherspoon is still being worked on, but is
>> very likely to follow something similar, i.e. an i2c pin to query per
>> device.
>>
>> Signed-off-by: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
>
> A few comments below. Smoketested this on one of our zaiuses, which
> let's be honest was probably one of the same machines you tested on...
:-)
I had borrowed some other zaius to test several configurations. I've
tested a cpu with only the "top" slot busy. Another cpu with only the
"bottom" slot busy. "Our" zaius has a cpu with the 2 slots busy. It was
all looking good, the i2c operation is giving the expected results. On
zaius. On ZZ, it's a different story, as indicated in the patch.
> Apart from comments below:
>
> Acked-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
>
>> ---
>> core/platform.c | 14 +++++-
>> hw/npu2-opencapi.c | 121
>> ++++++++++++++++++++++++++++++++++++++---------
>> include/platform.h | 4 ++
>> platforms/astbmc/zaius.c | 9 +++-
>> platforms/ibm-fsp/zz.c | 14 +++++-
>> 5 files changed, 137 insertions(+), 25 deletions(-)
>>
>> diff --git a/core/platform.c b/core/platform.c
>> index f09ea3c1..174235d9 100644
>> --- a/core/platform.c
>> +++ b/core/platform.c
>> @@ -34,6 +34,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT,
>> OPAL_PLATFORM_ERR_EVT, OPAL_CEC,
>> OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT,
>> OPAL_ABNORMAL_POWER_OFF);
>> +#define OCAPI_I2C_PRESENCE_ADDR 0x20
>> +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2)
>> +#define OCAPI_I2C_PRESENCE_TOP (1 << 7)
>> +
>
> I'm not convinced there's an advantage to having these as #defines
> rather than literals in platform_ocapi like all the other stuff already
> there.
So you're not going to like the next patch, where I align the reset
parameters to look the same. I can get rid of the macros, I don't feel
strongly about it.
> In any case, it should be noted that these values, like everything else
> in generic_ocapi, are for a ZZ.
True.
I'll wait for the review of the other patches to send a respin.
Fred
>> /*
>> * Various wrappers for platform functions
>> */
>> @@ -175,7 +179,15 @@ const struct platform_ocapi generic_ocapi = {
>> .i2c_offset = { 0x3, 0x1, 0x1 },
>> .i2c_odl0_data = { 0xFD, 0xFD, 0xFF },
>> .i2c_odl1_data = { 0xBF, 0xBF, 0xFF },
>> - .odl_phy_swap = true,
>> + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
>> + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
>> + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
>> + /*
>> + * i2c presence detection is broken on ZZ planar < v4 so we
>> + * force the presence until all our systems are upgraded
>> + */
>> + .force_presence = true,
>> + .odl_phy_swap = true,
>> };
>> static struct bmc_platform generic_bmc = {
>> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
>> index 2cc776c7..b2bc0628 100644
>> --- a/hw/npu2-opencapi.c
>> +++ b/hw/npu2-opencapi.c
>> @@ -829,6 +829,44 @@ static void reset_ocapi_device(struct npu2_dev *dev)
>> }
>> }
>> +static bool i2c_presence_detect(struct npu2_dev *dev)
>> +{
>> + uint8_t state, data;
>> + int rc;
>> +
>> + /*
>> + * Opencapi presence detection is done through i2c
>> + *
>> + * Lagrange platforms (ZZ, Zaius) use the same default mechanism.
>> + * Witherspoon will need a specific implementation, TBD.
>> + */
>> + rc = i2c_request_send(dev->i2c_port_id_ocapi,
>> + platform.ocapi->i2c_presence_addr,
>> + SMBUS_READ, 0, 1,
>> + &state, 1, 120);
>> + if (rc) {
>> + prlog(PR_ERR, "OCAPI: error detecting link presence: %d\n",
>> + rc);
>> + return true; /* assume link exists */
>> + }
>> +
>> + prlog(PR_DEBUG, "OCAPI: I2C presence detect: 0x%x\n", state);
>> +
>> + switch (dev->index) {
>> + case 2:
>> + data = platform.ocapi->i2c_presence_odl0;
>> + break;
>> + case 3:
>> + data = platform.ocapi->i2c_presence_odl1;
>> + break;
>> + default:
>> + prlog(PR_ERR, "OCAPI: presence detection on invalid link\n");
>> + return true;
>> + }
>> + /* Presence detect bits are active low */
>> + return !(state & data);
>> +}
>> +
>> static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev
>> *dev)
>> {
>> uint64_t reg, config_xscom;
>> @@ -896,6 +934,20 @@ static int odl_train(uint32_t gcid, uint32_t
>> index, struct npu2_dev *dev)
>> return OPAL_HARDWARE;
>> }
>> +static int64_t npu2_opencapi_get_presence_state(struct pci_slot *slot,
>> + uint8_t *val)
>> +{
>> + bool present;
>> + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
>> +
>> + if (platform.ocapi->force_presence)
>> + present = true;
>> + else
>> + present = i2c_presence_detect(dev);
>> + *val = present;
>> + return OPAL_SUCCESS;
>> +}
>> +
>> static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot,
>> uint8_t *val)
>> {
>> struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
>> @@ -926,9 +978,9 @@ static struct pci_slot
>> *npu2_opencapi_slot_create(struct phb *phb)
>> return slot;
>> /* TODO: Figure out other slot functions */
>> - slot->ops.get_presence_state = NULL;
>> - slot->ops.get_link_state = npu2_opencapi_get_link_state;
>> - slot->ops.get_power_state = NULL;
>> + slot->ops.get_presence_state = npu2_opencapi_get_presence_state;
>> + slot->ops.get_link_state = npu2_opencapi_get_link_state;
>> + slot->ops.get_power_state = NULL;
>> slot->ops.get_attention_state = NULL;
>> slot->ops.get_latch_state = NULL;
>> slot->ops.set_power_state = NULL;
>> @@ -1264,6 +1316,48 @@ static int setup_irq(struct npu2 *p)
>> #define LINK_TRAINING_RETRIES 5
>> +static int train_link(int chip_id, struct npu2_dev *dev)
>> +{
>> + bool train;
>> + int rc;
>> + int retries = LINK_TRAINING_RETRIES;
>> +
>> + if (platform.ocapi->force_presence)
>> + train = true;
>> + else
>> + train = i2c_presence_detect(dev);
>> + if (!train) {
>> + /*
>> + * FIXME: if there's no card on the link, we should consider
>> + * powering off the unused lanes to save energy
>> + */
>> + prlog(PR_INFO, "OCAPI: no card detected on link %d, chip %d\n",
>> + dev->index, chip_id);
>> + return -1;
>> + }
>> +
>> + do {
>> + rc = odl_train(chip_id, dev->index, dev);
>> + } while (rc != OPAL_SUCCESS && --retries);
>> +
>> + if (rc != OPAL_SUCCESS && retries == 0) {
>> + /**
>> + * @fwts-label OCAPILinkTrainingFailed
>> + * @fwts-advice The OpenCAPI link training procedure failed.
>> + * This indicates a hardware or firmware bug. OpenCAPI
>> + * functionality will not be available on this link.
>> + */
>> + prlog(PR_ERR,
>> + "OCAPI: Link %d on chip %u failed to train\n",
>> + dev->index, chip_id);
>> + prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
>> + get_odl_status(chip_id, dev->index));
>> + return -1;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static void npu2_opencapi_setup_device(struct dt_node *dn_link,
>> struct npu2 *n,
>> struct npu2_dev *dev)
>> {
>> @@ -1272,7 +1366,6 @@ static void npu2_opencapi_setup_device(struct
>> dt_node *dn_link, struct npu2 *n,
>> struct pci_slot *slot;
>> char port_name[17];
>> uint64_t mm_win[2];
>> - int retries = LINK_TRAINING_RETRIES;
>> int rc;
>> dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
>> @@ -1367,27 +1460,11 @@ static void npu2_opencapi_setup_device(struct
>> dt_node *dn_link, struct npu2 *n,
>> break;
>> case NPU2_TRAIN_DEFAULT:
>> - do {
>> - rc = odl_train(n->chip_id, dev->index, dev);
>> - } while (rc != OPAL_SUCCESS && --retries);
>> -
>> - if (rc != OPAL_SUCCESS && retries == 0) {
>> - /**
>> - * @fwts-label OCAPILinkTrainingFailed
>> - * @fwts-advice The OpenCAPI link training procedure failed.
>> - * This indicates a hardware or firmware bug. OpenCAPI
>> - * functionality will not be available on this link.
>> - */
>> - prlog(PR_ERR,
>> - "OCAPI: Link %d on chip %u failed to train\n",
>> - dev->index, n->chip_id);
>> - prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
>> - get_odl_status(n->chip_id, dev->index));
>> + rc = train_link(n->chip_id, dev);
>> + if (rc)
>> goto failed;
>> - }
>> otl_enabletx(n->chip_id, n->xscom_base, dev->index);
>> -
>> slot = npu2_opencapi_slot_create(&dev->phb_ocapi);
>> if (!slot) {
>> /**
>> diff --git a/include/platform.h b/include/platform.h
>> index a7776446..9a04ab37 100644
>> --- a/include/platform.h
>> +++ b/include/platform.h
>> @@ -51,6 +51,10 @@ struct platform_ocapi {
>> uint32_t i2c_offset[3]; /* Offsets on I2C device */
>> uint8_t i2c_odl0_data[3]; /* Data to reset ODL0 */
>> uint8_t i2c_odl1_data[3]; /* Data to reset ODL1 */
>> + uint8_t i2c_presence_addr; /* I2C address for presence
>> detection */
>> + uint8_t i2c_presence_odl0; /* I2C pin to read to detect ODL0 */
>> + uint8_t i2c_presence_odl1; /* I2C pin to read to detect ODL1 */
>
> We use these as a mask rather than a pin index, maybe note that explicitly
>
>> + bool force_presence; /* don't use i2c detection */
>> bool odl_phy_swap; /* Swap ODL1 to use brick 2 rather than
>> * brick 1 lanes */
>> };
>> diff --git a/platforms/astbmc/zaius.c b/platforms/astbmc/zaius.c
>> index 7678ad12..279fbf8b 100644
>> --- a/platforms/astbmc/zaius.c
>> +++ b/platforms/astbmc/zaius.c
>> @@ -24,13 +24,20 @@
>> #include "astbmc.h"
>> +#define OCAPI_I2C_PRESENCE_ADDR 0x20
>> +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2)
>> +#define OCAPI_I2C_PRESENCE_TOP (1 << 7)
>> +
>> const struct platform_ocapi zaius_ocapi = {
>> .i2c_engine = 1,
>> .i2c_port = 4,
>> .i2c_offset = { 0x3, 0x1, 0x1 },
>> .i2c_odl0_data = { 0xFD, 0xFD, 0xFF },
>> .i2c_odl1_data = { 0xBF, 0xBF, 0xFF },
>> - .odl_phy_swap = true,
>> + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
>> + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
>> + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
>> + .odl_phy_swap = true,
>> };
>> #define NPU_BASE 0x5011000
>> diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
>> index 9a849290..66f29d44 100644
>> --- a/platforms/ibm-fsp/zz.c
>> +++ b/platforms/ibm-fsp/zz.c
>> @@ -27,6 +27,10 @@
>> #include "ibm-fsp.h"
>> #include "lxvpd.h"
>> +#define OCAPI_I2C_PRESENCE_ADDR 0x20
>> +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2)
>> +#define OCAPI_I2C_PRESENCE_TOP (1 << 7)
>> +
>> /* We don't yet create NPU device nodes on ZZ, but these values are
>> correct */
>> const struct platform_ocapi zz_ocapi = {
>> .i2c_engine = 1,
>> @@ -34,7 +38,15 @@ const struct platform_ocapi zz_ocapi = {
>> .i2c_offset = { 0x3, 0x1, 0x1 },
>> .i2c_odl0_data = { 0xFD, 0xFD, 0xFF },
>> .i2c_odl1_data = { 0xBF, 0xBF, 0xFF },
>> - .odl_phy_swap = true,
>> + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
>> + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
>> + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
>> + /*
>> + * i2c presence detection is broken on ZZ planar < v4 so we
>> + * force the presence until all our systems are upgraded
>> + */
>> + .force_presence = true,
>> + .odl_phy_swap = true,
>> };
>> static bool zz_probe(void)
>>
>
More information about the Skiboot
mailing list