[Skiboot] [PATCH 1/5] npu2-opencapi: Use presence detection
Andrew Donnellan
andrew.donnellan at au1.ibm.com
Fri Apr 13 17:24:39 AEST 2018
On 12/04/18 22:45, Frederic Barrat wrote:
> From: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
>
> Presence detection is not part of the opencapi specification. So each
> platform may choose to implement it the way it wants.
>
> All current platforms implement it through an i2c device where we can
> query a pin to know if a device is connected or not. ZZ and Zaius have
> a similar design and even use the same i2c information and pin
> numbers.
> However, presence detection on older ZZ planar (older than v4) doesn't
> work, so we don't activate it for now, until our lab systems are
> upgraded and it's better tested.
>
> Presence detection on witherspoon is still being worked on, but is
> very likely to follow something similar, i.e. an i2c pin to query per
> device.
>
> Signed-off-by: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
A few comments below. Smoketested this on one of our zaiuses, which
let's be honest was probably one of the same machines you tested on...
Apart from comments below:
Acked-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> ---
> core/platform.c | 14 +++++-
> hw/npu2-opencapi.c | 121 ++++++++++++++++++++++++++++++++++++++---------
> include/platform.h | 4 ++
> platforms/astbmc/zaius.c | 9 +++-
> platforms/ibm-fsp/zz.c | 14 +++++-
> 5 files changed, 137 insertions(+), 25 deletions(-)
>
> diff --git a/core/platform.c b/core/platform.c
> index f09ea3c1..174235d9 100644
> --- a/core/platform.c
> +++ b/core/platform.c
> @@ -34,6 +34,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC,
> OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT,
> OPAL_ABNORMAL_POWER_OFF);
>
> +#define OCAPI_I2C_PRESENCE_ADDR 0x20
> +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2)
> +#define OCAPI_I2C_PRESENCE_TOP (1 << 7)
> +
I'm not convinced there's an advantage to having these as #defines
rather than literals in platform_ocapi like all the other stuff already
there.
In any case, it should be noted that these values, like everything else
in generic_ocapi, are for a ZZ.
> /*
> * Various wrappers for platform functions
> */
> @@ -175,7 +179,15 @@ const struct platform_ocapi generic_ocapi = {
> .i2c_offset = { 0x3, 0x1, 0x1 },
> .i2c_odl0_data = { 0xFD, 0xFD, 0xFF },
> .i2c_odl1_data = { 0xBF, 0xBF, 0xFF },
> - .odl_phy_swap = true,
> + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
> + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
> + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
> + /*
> + * i2c presence detection is broken on ZZ planar < v4 so we
> + * force the presence until all our systems are upgraded
> + */
> + .force_presence = true,
> + .odl_phy_swap = true,
> };
>
> static struct bmc_platform generic_bmc = {
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 2cc776c7..b2bc0628 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -829,6 +829,44 @@ static void reset_ocapi_device(struct npu2_dev *dev)
> }
> }
>
> +static bool i2c_presence_detect(struct npu2_dev *dev)
> +{
> + uint8_t state, data;
> + int rc;
> +
> + /*
> + * Opencapi presence detection is done through i2c
> + *
> + * Lagrange platforms (ZZ, Zaius) use the same default mechanism.
> + * Witherspoon will need a specific implementation, TBD.
> + */
> + rc = i2c_request_send(dev->i2c_port_id_ocapi,
> + platform.ocapi->i2c_presence_addr,
> + SMBUS_READ, 0, 1,
> + &state, 1, 120);
> + if (rc) {
> + prlog(PR_ERR, "OCAPI: error detecting link presence: %d\n",
> + rc);
> + return true; /* assume link exists */
> + }
> +
> + prlog(PR_DEBUG, "OCAPI: I2C presence detect: 0x%x\n", state);
> +
> + switch (dev->index) {
> + case 2:
> + data = platform.ocapi->i2c_presence_odl0;
> + break;
> + case 3:
> + data = platform.ocapi->i2c_presence_odl1;
> + break;
> + default:
> + prlog(PR_ERR, "OCAPI: presence detection on invalid link\n");
> + return true;
> + }
> + /* Presence detect bits are active low */
> + return !(state & data);
> +}
> +
> static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev)
> {
> uint64_t reg, config_xscom;
> @@ -896,6 +934,20 @@ static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev)
> return OPAL_HARDWARE;
> }
>
> +static int64_t npu2_opencapi_get_presence_state(struct pci_slot *slot,
> + uint8_t *val)
> +{
> + bool present;
> + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
> +
> + if (platform.ocapi->force_presence)
> + present = true;
> + else
> + present = i2c_presence_detect(dev);
> + *val = present;
> + return OPAL_SUCCESS;
> +}
> +
> static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
> {
> struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
> @@ -926,9 +978,9 @@ static struct pci_slot *npu2_opencapi_slot_create(struct phb *phb)
> return slot;
>
> /* TODO: Figure out other slot functions */
> - slot->ops.get_presence_state = NULL;
> - slot->ops.get_link_state = npu2_opencapi_get_link_state;
> - slot->ops.get_power_state = NULL;
> + slot->ops.get_presence_state = npu2_opencapi_get_presence_state;
> + slot->ops.get_link_state = npu2_opencapi_get_link_state;
> + slot->ops.get_power_state = NULL;
> slot->ops.get_attention_state = NULL;
> slot->ops.get_latch_state = NULL;
> slot->ops.set_power_state = NULL;
> @@ -1264,6 +1316,48 @@ static int setup_irq(struct npu2 *p)
>
> #define LINK_TRAINING_RETRIES 5
>
> +static int train_link(int chip_id, struct npu2_dev *dev)
> +{
> + bool train;
> + int rc;
> + int retries = LINK_TRAINING_RETRIES;
> +
> + if (platform.ocapi->force_presence)
> + train = true;
> + else
> + train = i2c_presence_detect(dev);
> + if (!train) {
> + /*
> + * FIXME: if there's no card on the link, we should consider
> + * powering off the unused lanes to save energy
> + */
> + prlog(PR_INFO, "OCAPI: no card detected on link %d, chip %d\n",
> + dev->index, chip_id);
> + return -1;
> + }
> +
> + do {
> + rc = odl_train(chip_id, dev->index, dev);
> + } while (rc != OPAL_SUCCESS && --retries);
> +
> + if (rc != OPAL_SUCCESS && retries == 0) {
> + /**
> + * @fwts-label OCAPILinkTrainingFailed
> + * @fwts-advice The OpenCAPI link training procedure failed.
> + * This indicates a hardware or firmware bug. OpenCAPI
> + * functionality will not be available on this link.
> + */
> + prlog(PR_ERR,
> + "OCAPI: Link %d on chip %u failed to train\n",
> + dev->index, chip_id);
> + prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
> + get_odl_status(chip_id, dev->index));
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
> struct npu2_dev *dev)
> {
> @@ -1272,7 +1366,6 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
> struct pci_slot *slot;
> char port_name[17];
> uint64_t mm_win[2];
> - int retries = LINK_TRAINING_RETRIES;
> int rc;
>
> dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> @@ -1367,27 +1460,11 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
> break;
>
> case NPU2_TRAIN_DEFAULT:
> - do {
> - rc = odl_train(n->chip_id, dev->index, dev);
> - } while (rc != OPAL_SUCCESS && --retries);
> -
> - if (rc != OPAL_SUCCESS && retries == 0) {
> - /**
> - * @fwts-label OCAPILinkTrainingFailed
> - * @fwts-advice The OpenCAPI link training procedure failed.
> - * This indicates a hardware or firmware bug. OpenCAPI
> - * functionality will not be available on this link.
> - */
> - prlog(PR_ERR,
> - "OCAPI: Link %d on chip %u failed to train\n",
> - dev->index, n->chip_id);
> - prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
> - get_odl_status(n->chip_id, dev->index));
> + rc = train_link(n->chip_id, dev);
> + if (rc)
> goto failed;
> - }
>
> otl_enabletx(n->chip_id, n->xscom_base, dev->index);
> -
> slot = npu2_opencapi_slot_create(&dev->phb_ocapi);
> if (!slot) {
> /**
> diff --git a/include/platform.h b/include/platform.h
> index a7776446..9a04ab37 100644
> --- a/include/platform.h
> +++ b/include/platform.h
> @@ -51,6 +51,10 @@ struct platform_ocapi {
> uint32_t i2c_offset[3]; /* Offsets on I2C device */
> uint8_t i2c_odl0_data[3]; /* Data to reset ODL0 */
> uint8_t i2c_odl1_data[3]; /* Data to reset ODL1 */
> + uint8_t i2c_presence_addr; /* I2C address for presence detection */
> + uint8_t i2c_presence_odl0; /* I2C pin to read to detect ODL0 */
> + uint8_t i2c_presence_odl1; /* I2C pin to read to detect ODL1 */
We use these as a mask rather than a pin index, maybe note that explicitly
> + bool force_presence; /* don't use i2c detection */
> bool odl_phy_swap; /* Swap ODL1 to use brick 2 rather than
> * brick 1 lanes */
> };
> diff --git a/platforms/astbmc/zaius.c b/platforms/astbmc/zaius.c
> index 7678ad12..279fbf8b 100644
> --- a/platforms/astbmc/zaius.c
> +++ b/platforms/astbmc/zaius.c
> @@ -24,13 +24,20 @@
>
> #include "astbmc.h"
>
> +#define OCAPI_I2C_PRESENCE_ADDR 0x20
> +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2)
> +#define OCAPI_I2C_PRESENCE_TOP (1 << 7)
> +
> const struct platform_ocapi zaius_ocapi = {
> .i2c_engine = 1,
> .i2c_port = 4,
> .i2c_offset = { 0x3, 0x1, 0x1 },
> .i2c_odl0_data = { 0xFD, 0xFD, 0xFF },
> .i2c_odl1_data = { 0xBF, 0xBF, 0xFF },
> - .odl_phy_swap = true,
> + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
> + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
> + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
> + .odl_phy_swap = true,
> };
>
> #define NPU_BASE 0x5011000
> diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
> index 9a849290..66f29d44 100644
> --- a/platforms/ibm-fsp/zz.c
> +++ b/platforms/ibm-fsp/zz.c
> @@ -27,6 +27,10 @@
> #include "ibm-fsp.h"
> #include "lxvpd.h"
>
> +#define OCAPI_I2C_PRESENCE_ADDR 0x20
> +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2)
> +#define OCAPI_I2C_PRESENCE_TOP (1 << 7)
> +
> /* We don't yet create NPU device nodes on ZZ, but these values are correct */
> const struct platform_ocapi zz_ocapi = {
> .i2c_engine = 1,
> @@ -34,7 +38,15 @@ const struct platform_ocapi zz_ocapi = {
> .i2c_offset = { 0x3, 0x1, 0x1 },
> .i2c_odl0_data = { 0xFD, 0xFD, 0xFF },
> .i2c_odl1_data = { 0xBF, 0xBF, 0xFF },
> - .odl_phy_swap = true,
> + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
> + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
> + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
> + /*
> + * i2c presence detection is broken on ZZ planar < v4 so we
> + * force the presence until all our systems are upgraded
> + */
> + .force_presence = true,
> + .odl_phy_swap = true,
> };
>
> static bool zz_probe(void)
>
--
Andrew Donnellan OzLabs, ADL Canberra
andrew.donnellan at au1.ibm.com IBM Australia Limited
More information about the Skiboot
mailing list