[Skiboot] [PATCH 1/5] npu2-opencapi: Use presence detection

Andrew Donnellan andrew.donnellan at au1.ibm.com
Fri Apr 13 17:24:39 AEST 2018


On 12/04/18 22:45, Frederic Barrat wrote:
> From: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
> 
> Presence detection is not part of the opencapi specification. So each
> platform may choose to implement it the way it wants.
> 
> All current platforms implement it through an i2c device where we can
> query a pin to know if a device is connected or not. ZZ and Zaius have
> a similar design and even use the same i2c information and pin
> numbers.
> However, presence detection on older ZZ planar (older than v4) doesn't
> work, so we don't activate it for now, until our lab systems are
> upgraded and it's better tested.
> 
> Presence detection on witherspoon is still being worked on, but is
> very likely to follow something similar, i.e. an i2c pin to query per
> device.
> 
> Signed-off-by: Frederic Barrat <fbarrat at linux.vnet.ibm.com>

A few comments below. Smoketested this on one of our zaiuses, which 
let's be honest was probably one of the same machines you tested on...

Apart from comments below:

Acked-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>

> ---
>   core/platform.c          |  14 +++++-
>   hw/npu2-opencapi.c       | 121 ++++++++++++++++++++++++++++++++++++++---------
>   include/platform.h       |   4 ++
>   platforms/astbmc/zaius.c |   9 +++-
>   platforms/ibm-fsp/zz.c   |  14 +++++-
>   5 files changed, 137 insertions(+), 25 deletions(-)
> 
> diff --git a/core/platform.c b/core/platform.c
> index f09ea3c1..174235d9 100644
> --- a/core/platform.c
> +++ b/core/platform.c
> @@ -34,6 +34,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC,
>   		 OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT,
>   		 OPAL_ABNORMAL_POWER_OFF);
>   
> +#define OCAPI_I2C_PRESENCE_ADDR    0x20
> +#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
> +#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
> +

I'm not convinced there's an advantage to having these as #defines 
rather than literals in platform_ocapi like all the other stuff already 
there.

In any case, it should be noted that these values, like everything else 
in generic_ocapi, are for a ZZ.

>   /*
>    * Various wrappers for platform functions
>    */
> @@ -175,7 +179,15 @@ const struct platform_ocapi generic_ocapi = {
>   	.i2c_offset	= { 0x3, 0x1, 0x1 },
>   	.i2c_odl0_data	= { 0xFD, 0xFD, 0xFF },
>   	.i2c_odl1_data	= { 0xBF, 0xBF, 0xFF },
> -	.odl_phy_swap	= true,
> +	.i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
> +	.i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
> +	.i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
> +	/*
> +	 * i2c presence detection is broken on ZZ planar < v4 so we
> +	 * force the presence until all our systems are upgraded
> +	 */
> +	.force_presence    = true,
> +	.odl_phy_swap   = true,
>   };
>   
>   static struct bmc_platform generic_bmc = {
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 2cc776c7..b2bc0628 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -829,6 +829,44 @@ static void reset_ocapi_device(struct npu2_dev *dev)
>   	}
>   }
>   
> +static bool i2c_presence_detect(struct npu2_dev *dev)
> +{
> +	uint8_t state, data;
> +	int rc;
> +
> +	/*
> +	 * Opencapi presence detection is done through i2c
> +	 *
> +	 * Lagrange platforms (ZZ, Zaius) use the same default mechanism.
> +	 * Witherspoon will need a specific implementation, TBD.
> +	 */
> +	rc = i2c_request_send(dev->i2c_port_id_ocapi,
> +			platform.ocapi->i2c_presence_addr,
> +			SMBUS_READ, 0, 1,
> +			&state, 1, 120);
> +	if (rc) {
> +		prlog(PR_ERR, "OCAPI: error detecting link presence: %d\n",
> +			rc);
> +		return true; /* assume link exists */
> +	}
> +
> +	prlog(PR_DEBUG, "OCAPI: I2C presence detect: 0x%x\n", state);
> +
> +	switch (dev->index) {
> +	case 2:
> +		data = platform.ocapi->i2c_presence_odl0;
> +		break;
> +	case 3:
> +		data = platform.ocapi->i2c_presence_odl1;
> +		break;
> +	default:
> +		prlog(PR_ERR, "OCAPI: presence detection on invalid link\n");
> +		return true;
> +	}
> +	/* Presence detect bits are active low */
> +	return !(state & data);
> +}
> +
>   static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev)
>   {
>   	uint64_t reg, config_xscom;
> @@ -896,6 +934,20 @@ static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev)
>   	return OPAL_HARDWARE;
>   }
>   
> +static int64_t npu2_opencapi_get_presence_state(struct pci_slot *slot,
> +						uint8_t *val)
> +{
> +	bool present;
> +	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
> +
> +	if (platform.ocapi->force_presence)
> +		present = true;
> +	else
> +		present = i2c_presence_detect(dev);
> +	*val = present;
> +	return OPAL_SUCCESS;
> +}
> +
>   static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
>   {
>   	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
> @@ -926,9 +978,9 @@ static struct pci_slot *npu2_opencapi_slot_create(struct phb *phb)
>   		return slot;
>   
>   	/* TODO: Figure out other slot functions */
> -	slot->ops.get_presence_state = NULL;
> -	slot->ops.get_link_state = npu2_opencapi_get_link_state;
> -	slot->ops.get_power_state = NULL;
> +	slot->ops.get_presence_state  = npu2_opencapi_get_presence_state;
> +	slot->ops.get_link_state      = npu2_opencapi_get_link_state;
> +	slot->ops.get_power_state     = NULL;
>   	slot->ops.get_attention_state = NULL;
>   	slot->ops.get_latch_state     = NULL;
>   	slot->ops.set_power_state     = NULL;
> @@ -1264,6 +1316,48 @@ static int setup_irq(struct npu2 *p)
>   
>   #define LINK_TRAINING_RETRIES	5
>   
> +static int train_link(int chip_id, struct npu2_dev *dev)
> +{
> +	bool train;
> +	int rc;
> +	int retries = LINK_TRAINING_RETRIES;
> +
> +	if (platform.ocapi->force_presence)
> +		train = true;
> +	else
> +		train = i2c_presence_detect(dev);
> +	if (!train) {
> +		/*
> +		 * FIXME: if there's no card on the link, we should consider
> +		 * powering off the unused lanes to save energy
> +		 */
> +		prlog(PR_INFO, "OCAPI: no card detected on link %d, chip %d\n",
> +			dev->index, chip_id);
> +		return -1;
> +	}
> +
> +	do {
> +		rc = odl_train(chip_id, dev->index, dev);
> +	} while (rc != OPAL_SUCCESS && --retries);
> +
> +	if (rc != OPAL_SUCCESS && retries == 0) {
> +		/**
> +		 * @fwts-label OCAPILinkTrainingFailed
> +		 * @fwts-advice The OpenCAPI link training procedure failed.
> +		 * This indicates a hardware or firmware bug. OpenCAPI
> +		 * functionality will not be available on this link.
> +		 */
> +		prlog(PR_ERR,
> +			"OCAPI: Link %d on chip %u failed to train\n",
> +			dev->index, chip_id);
> +		prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
> +			get_odl_status(chip_id, dev->index));
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
>   static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
>   				       struct npu2_dev *dev)
>   {
> @@ -1272,7 +1366,6 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
>   	struct pci_slot *slot;
>   	char port_name[17];
>   	uint64_t mm_win[2];
> -	int retries = LINK_TRAINING_RETRIES;
>   	int rc;
>   
>   	dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> @@ -1367,27 +1460,11 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
>   		break;
>   
>   	case NPU2_TRAIN_DEFAULT:
> -		do {
> -			rc = odl_train(n->chip_id, dev->index, dev);
> -		} while (rc != OPAL_SUCCESS && --retries);
> -
> -		if (rc != OPAL_SUCCESS && retries == 0) {
> -			/**
> -			 * @fwts-label OCAPILinkTrainingFailed
> -			 * @fwts-advice The OpenCAPI link training procedure failed.
> -			 * This indicates a hardware or firmware bug. OpenCAPI
> -			 * functionality will not be available on this link.
> -			 */
> -			prlog(PR_ERR,
> -				"OCAPI: Link %d on chip %u failed to train\n",
> -				dev->index, n->chip_id);
> -			prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
> -				get_odl_status(n->chip_id, dev->index));
> +		rc = train_link(n->chip_id, dev);
> +		if (rc)
>   			goto failed;
> -		}
>   
>   		otl_enabletx(n->chip_id, n->xscom_base, dev->index);
> -
>   		slot = npu2_opencapi_slot_create(&dev->phb_ocapi);
>   		if (!slot) {
>   			/**
> diff --git a/include/platform.h b/include/platform.h
> index a7776446..9a04ab37 100644
> --- a/include/platform.h
> +++ b/include/platform.h
> @@ -51,6 +51,10 @@ struct platform_ocapi {
>   	uint32_t i2c_offset[3];		/* Offsets on I2C device */
>   	uint8_t i2c_odl0_data[3];	/* Data to reset ODL0 */
>   	uint8_t i2c_odl1_data[3];	/* Data to reset ODL1 */
> +	uint8_t i2c_presence_addr;	/* I2C address for presence detection */
> +	uint8_t i2c_presence_odl0;	/* I2C pin to read to detect ODL0 */
> +	uint8_t i2c_presence_odl1;	/* I2C pin to read to detect ODL1 */

We use these as a mask rather than a pin index, maybe note that explicitly

> +	bool force_presence;            /* don't use i2c detection */
>   	bool odl_phy_swap;		/* Swap ODL1 to use brick 2 rather than
>   					 * brick 1 lanes */
>   };
> diff --git a/platforms/astbmc/zaius.c b/platforms/astbmc/zaius.c
> index 7678ad12..279fbf8b 100644
> --- a/platforms/astbmc/zaius.c
> +++ b/platforms/astbmc/zaius.c
> @@ -24,13 +24,20 @@
>   
>   #include "astbmc.h"
>   
> +#define OCAPI_I2C_PRESENCE_ADDR    0x20
> +#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
> +#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
> +
>   const struct platform_ocapi zaius_ocapi = {
>   	.i2c_engine	= 1,
>   	.i2c_port	= 4,
>   	.i2c_offset	= { 0x3, 0x1, 0x1 },
>   	.i2c_odl0_data	= { 0xFD, 0xFD, 0xFF },
>   	.i2c_odl1_data	= { 0xBF, 0xBF, 0xFF },
> -	.odl_phy_swap	= true,
> +	.i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
> +	.i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
> +	.i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
> +	.odl_phy_swap   = true,
>   };
>   
>   #define NPU_BASE 0x5011000
> diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
> index 9a849290..66f29d44 100644
> --- a/platforms/ibm-fsp/zz.c
> +++ b/platforms/ibm-fsp/zz.c
> @@ -27,6 +27,10 @@
>   #include "ibm-fsp.h"
>   #include "lxvpd.h"
>   
> +#define OCAPI_I2C_PRESENCE_ADDR    0x20
> +#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
> +#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
> +
>   /* We don't yet create NPU device nodes on ZZ, but these values are correct */
>   const struct platform_ocapi zz_ocapi = {
>   	.i2c_engine	= 1,
> @@ -34,7 +38,15 @@ const struct platform_ocapi zz_ocapi = {
>   	.i2c_offset	= { 0x3, 0x1, 0x1 },
>   	.i2c_odl0_data	= { 0xFD, 0xFD, 0xFF },
>   	.i2c_odl1_data	= { 0xBF, 0xBF, 0xFF },
> -	.odl_phy_swap	= true,
> +	.i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
> +	.i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
> +	.i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
> +	/*
> +	 * i2c presence detection is broken on ZZ planar < v4 so we
> +	 * force the presence until all our systems are upgraded
> +	 */
> +	.force_presence    = true,
> +	.odl_phy_swap   = true,
>   };
>   
>   static bool zz_probe(void)
> 

-- 
Andrew Donnellan              OzLabs, ADL Canberra
andrew.donnellan at au1.ibm.com  IBM Australia Limited



More information about the Skiboot mailing list