[Skiboot] [PATCH 7/7] npu2-opencapi: Fix adapter reset when using 2 adapters

Andrew Donnellan andrew.donnellan at au1.ibm.com
Wed Mar 6 13:43:42 AEDT 2019


On 2/3/19 12:52 am, Frederic Barrat wrote:
> If two opencapi adapters are on the same obus, we may try to train the
> two links in parallel at boot time, when all the PCI links are being
> trained. Both links use the same i2c controller to handle the reset
> signal, so some care is needed to make sure resetting one doesn't
> interfere with the reset of the other. We need to keep track of the
> current state of the i2c controller (and use locking).
> 
> This went mostly unnoticed as you need to have 2 opencapi cards on the
> same socket and links tended to train anyway because of the retries.
> 
> Signed-off-by: Frederic Barrat <fbarrat at linux.ibm.com>

Is this fix important enough that it should go to stable?

Minor comment below

Reviewed-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>

> ---
>   hw/npu2-common.c   |  3 +++
>   hw/npu2-opencapi.c | 34 +++++++++++++++++++++++++++-------
>   include/npu2.h     |  4 ++++
>   3 files changed, 34 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
> index ec69953f..3d0b6366 100644
> --- a/hw/npu2-common.c
> +++ b/hw/npu2-common.c
> @@ -182,6 +182,9 @@ static struct npu2 *setup_npu(struct dt_node *dn)
>   	npu->xscom_base = dt_get_address(dn, 0, NULL);
>   	npu->phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
>   
> +	init_lock(&npu->i2c_lock);
> +	npu->i2c_pin_mode = ~0; // input mode by default
> +	npu->i2c_pin_wr_state = ~0; // reset is active low
>   	if (platform.ocapi) {
>   		/* Find I2C port for handling device presence/reset */
>   		snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d",
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 786fb655..ccd3cd4b 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -848,10 +848,9 @@ static void otl_enabletx(uint32_t gcid, uint32_t scom_base,
>   	/* TODO: Abort if credits are zero */
>   }
>   
> -static void assert_adapter_reset(struct npu2_dev *dev)
> +static uint8_t get_reset_pin(struct npu2_dev *dev)
>   {
> -	uint8_t pin, data;
> -	int rc;
> +	uint8_t pin;
>   
>   	switch (dev->brick_index) {
>   	case 2:
> @@ -869,14 +868,25 @@ static void assert_adapter_reset(struct npu2_dev *dev)
>   	default:
>   		assert(false);
>   	}
> +	return pin;
> +}
> +
> +static void assert_adapter_reset(struct npu2_dev *dev)
> +{
> +	uint8_t pin, data;

Any reason to keep data around instead of just referencing dev->npu->* 
directly?

> +	int rc;
>   
> +	pin = get_reset_pin(dev);
>   	/*
>   	 * set the i2c reset pin in output mode
>   	 *
>   	 * On the 9554 device, register 3 is the configuration
>   	 * register and a pin is in output mode if its value is 0
>   	 */
> -	data = ~pin;
> +	lock(&dev->npu->i2c_lock);
> +	dev->npu->i2c_pin_mode &= ~pin;
> +	data = dev->npu->i2c_pin_mode;
> +
>   	rc = i2c_request_send(dev->npu->i2c_port_id_ocapi,
>   			platform.ocapi->i2c_reset_addr, SMBUS_WRITE,
>   			0x3, 1,
> @@ -885,16 +895,20 @@ static void assert_adapter_reset(struct npu2_dev *dev)
>   		goto err;
>   
>   	/* register 1 controls the signal, reset is active low */
> -	data = ~pin;
> +	dev->npu->i2c_pin_wr_state &= ~pin;
> +	data = dev->npu->i2c_pin_wr_state;
> +
>   	rc = i2c_request_send(dev->npu->i2c_port_id_ocapi,
>   			platform.ocapi->i2c_reset_addr, SMBUS_WRITE,
>   			0x1, 1,
>   			&data, sizeof(data), 120);
>   	if (rc)
>   		goto err;
> +	unlock(&dev->npu->i2c_lock);
>   	return;
>   
>   err:
> +	unlock(&dev->npu->i2c_lock);
>   	/**
>   	 * @fwts-label OCAPIDeviceResetFailed
>   	 * @fwts-advice There was an error attempting to send
> @@ -905,14 +919,20 @@ err:
>   
>   static void deassert_adapter_reset(struct npu2_dev *dev)
>   {
> -	uint8_t data;
> +	uint8_t pin, data;
>   	int rc;
>   
> -	data = 0xFF;
> +	pin = get_reset_pin(dev);
> +
> +	lock(&dev->npu->i2c_lock);
> +	dev->npu->i2c_pin_wr_state |= pin;
> +	data = dev->npu->i2c_pin_wr_state;
> +
>   	rc = i2c_request_send(dev->npu->i2c_port_id_ocapi,
>   			platform.ocapi->i2c_reset_addr, SMBUS_WRITE,
>   			0x1, 1,
>   			&data, sizeof(data), 120);
> +	unlock(&dev->npu->i2c_lock);
>   	if (rc) {
>   		/**
>   		 * @fwts-label OCAPIDeviceResetFailed
> diff --git a/include/npu2.h b/include/npu2.h
> index cfa3b1f8..b62d1748 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -173,7 +173,11 @@ struct npu2 {
>   	struct phb	phb_nvlink;
>   	uint32_t	phb_index;
>   
> +	/* OCAPI */
>   	uint64_t	i2c_port_id_ocapi;
> +	struct lock	i2c_lock;
> +	uint8_t		i2c_pin_mode;
> +	uint8_t		i2c_pin_wr_state;
>   };
>   
>   static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
> 

-- 
Andrew Donnellan              OzLabs, ADL Canberra
andrew.donnellan at au1.ibm.com  IBM Australia Limited



More information about the Skiboot mailing list