[Skiboot] [PATCH v2 4/6] hw/npu2, platform: Add NPU2 platform device detection callback

Frederic Barrat fbarrat at linux.ibm.com
Thu Aug 30 02:46:12 AEST 2018



Le 27/08/2018 à 10:55, Andrew Donnellan a écrit :
> There is no standardised way to determine the presence and type of devices
> connected to an NPU on POWER9.
> 
> Currently, we hardcode device types based on platform type (as no platform
> currently supports both OpenCAPI and NVLink), and for OpenCAPI platforms
> we use I2C to detect presence.
> 
> Witherspoon (and potentially other platforms later on) supports both
> NVLink and OpenCAPI, and additionally uses SXM2 connectors which can carry
> more than one link, rather than the SlimSAS connectors used for OpenCAPI on
> Zaius and ZZ. This necessitates some special handling.
> 
> Add a platform callback for NPU device detection. In a later patch, we
> will use this to implement Witherspoon-specific device detection. For now,
> add a Witherspoon stub that sets all links to NVLink (i.e. current
> behaviour).
> 
> Move the existing I2C-based presence detection for OpenCAPI devices on
> Zaius/ZZ into common code, which we use by default for platforms which do
> not define a callback. Clean up the use of the ibm,npu-link-type property,
> which will now be exposed solely for debugging and not consumed internally.
> 
> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> 
> ---
> v1->v2:
> - turn the "generic" i2c detection code into a callback (Alistair)
> ---
>   core/platform.c                |   2 +-
>   hdata/spira.c                  |   1 +-
>   hw/npu2-common.c               | 103 +++++++++++++++++++++++++++++-----
>   hw/npu2-opencapi.c             |  85 +---------------------------
>   include/npu2.h                 |  14 ++++-
>   include/platform.h             |   4 +-
>   platforms/astbmc/witherspoon.c |  10 +++-
>   platforms/astbmc/zaius.c       |   3 +-
>   8 files changed, 125 insertions(+), 97 deletions(-)
> 
> diff --git a/core/platform.c b/core/platform.c
> index b32cbf5ce9d1..4b3eaa4bd1d1 100644
> --- a/core/platform.c
> +++ b/core/platform.c
> @@ -25,6 +25,7 @@
>   #include <errorlog.h>
>   #include <bt.h>
>   #include <nvram.h>
> +#include <npu2.h>
>   #include <platforms/astbmc/astbmc.h>
> 
>   bool manufacturing_mode = false;
> @@ -204,6 +205,7 @@ static struct platform generic_platform = {
>   	.start_preload_resource	= generic_start_preload_resource,
>   	.resource_loaded	= generic_resource_loaded,
>   	.ocapi		= &generic_ocapi,
> +	.npu2_device_detect = npu2_i2c_presence_detect, /* Assumes ZZ */
>   };
> 
>   const struct bmc_platform *bmc_platform = &generic_bmc;
> diff --git a/hdata/spira.c b/hdata/spira.c
> index c820c4daf26e..189584d5ddb6 100644
> --- a/hdata/spira.c
> +++ b/hdata/spira.c
> @@ -1490,7 +1490,6 @@ static void add_npu(struct dt_node *xscom, const struct HDIF_array_hdr *links,
>   		}
> 
>   		dt_add_property_string(node, "compatible", "ibm,npu-link");
> -		dt_add_property_string(node, "ibm,npu-link-type", "nvlink");
>   		dt_add_property_cells(node, "reg", link_count);
>   		dt_add_property_cells(node, "ibm,npu-link-index", link_count);
>   		dt_add_property_cells(node, "ibm,workbook-link-id", link_id);
> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
> index 7346e704fc93..b87d43469d5b 100644
> --- a/hw/npu2-common.c
> +++ b/hw/npu2-common.c
> @@ -21,19 +21,7 @@
>   #include <npu2-regs.h>
>   #include <bitutils.h>
>   #include <nvram.h>
> -
> -enum npu2_dev_type npu2_dt_link_dev_type(struct dt_node *link)
> -{
> -	const char *link_type = dt_prop_get(link, "ibm,npu-link-type") ?:
> -		"unknown";
> -	if (streq(link_type, "nvlink")) {
> -		return NPU2_DEV_TYPE_NVLINK;
> -	} else if (streq(link_type, "opencapi")) {
> -		return NPU2_DEV_TYPE_OPENCAPI;
> -	} else {
> -		return NPU2_DEV_TYPE_UNKNOWN;
> -	}
> -}
> +#include <i2c.h>
> 
>   /*
>    * We use the indirect method because it uses the same addresses as
> @@ -109,12 +97,62 @@ void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mas
>   			(uint64_t)new_val << 32);
>   }
> 
> +static bool _i2c_presence_detect(struct npu2_dev *dev)
> +{
> +	uint8_t state, data;
> +	int rc;
> +
> +	rc = i2c_request_send(dev->npu->i2c_port_id_ocapi,
> +			platform.ocapi->i2c_presence_addr,
> +			SMBUS_READ, 0, 1,
> +			&state, 1, 120);
> +	if (rc) {
> +		OCAPIERR(dev, "error detecting link presence: %d\n", rc);
> +		return true; /* assume link exists */
> +	}
> +
> +	OCAPIDBG(dev, "I2C presence detect: 0x%x\n", state);
> +
> +	switch (dev->link_index) {
> +	case 2:
> +		data = platform.ocapi->i2c_presence_odl0;
> +		break;
> +	case 3:
> +		data = platform.ocapi->i2c_presence_odl1;
> +		break;
> +	default:
> +		OCAPIERR(dev, "presence detection on invalid link\n");
> +		return true;
> +	}
> +	/* Presence detect bits are active low */
> +	return !(state & data);
> +}
> +
> +/*
> + * A default presence detection implementation for platforms like ZZ and Zaius
> + * that don't implement their own. Assumes all devices found will be OpenCAPI.
> + */
> +void npu2_i2c_presence_detect(struct npu2 *npu)
> +{
> +	struct npu2_dev *dev;
> +	assert(platform.ocapi);
> +	for (int i = 0; i < npu->total_devices; i++) {
> +		dev = &npu->devices[i];
> +		if (platform.ocapi->force_presence ||
> +		    _i2c_presence_detect(dev))
> +			dev->type = NPU2_DEV_TYPE_OPENCAPI;
> +		else
> +			dev->type = NPU2_DEV_TYPE_UNKNOWN;
> +	}
> +}
> +
>   static struct npu2 *setup_npu(struct dt_node *dn)
>   {
>   	struct npu2 *npu;
>   	struct npu2_dev *dev;
>   	struct dt_node *np;
>   	uint32_t num_links;
> +	char port_name[17];
>   	void *npumem;
>   	char *path;
>   	int gcid;
> @@ -138,6 +176,27 @@ static struct npu2 *setup_npu(struct dt_node *dn)
>   	npu->chip_id = gcid;
>   	npu->xscom_base = dt_get_address(dn, 0, NULL);
>   	npu->phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
> +
> +	if (platform.ocapi) {
> +		/* Find I2C port for handling device presence/reset */
> +		snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d",
> +			 gcid, platform.ocapi->i2c_engine,
> +			 platform.ocapi->i2c_port);
> +		prlog(PR_DEBUG, "NPU: Looking for I2C port %s\n", port_name);
> +
> +		dt_for_each_compatible(dt_root, np, "ibm,power9-i2c-port") {
> +			if (streq(port_name, dt_prop_get(np, "ibm,port-name"))) {
> +				npu->i2c_port_id_ocapi = dt_prop_get_u32(np, "ibm,opal-id");
> +				break;
> +			}
> +		}
> +
> +		if (!npu->i2c_port_id_ocapi) {
> +			prlog(PR_ERR, "NPU: Couldn't find I2C port %s\n",
> +			      port_name);
> +		}

If we cannot find the i2c port, the error is ignored. Later on, when 
doing presence detect, we'll use a 0 port number and I wouldn't expect 
anything good out of it.
I think it should be a hard error. If there's no I2C for reset, we are 
doomed.


> +	}
> +
>   	npu->devices = npumem + sizeof(struct npu2);
> 
>   	dt_for_each_compatible(dn, np, "ibm,npu-link") {
> @@ -145,7 +204,8 @@ static struct npu2 *setup_npu(struct dt_node *dn)
>   		dev->link_index = dt_prop_get_u32(np, "ibm,npu-link-index");
>   		/* May be overridden by platform presence detection */
>   		dev->brick_index = dev->link_index;
> -		dev->type = npu2_dt_link_dev_type(np);
> +		/* Will be overridden by presence detection */
> +		dev->type = NPU2_DEV_TYPE_UNKNOWN;
>   		dev->npu = npu;
>   		dev->dt_node = np;
>   		dev->pl_xscom_base = dt_prop_get_u64(np, "ibm,npu-phy");
> @@ -176,13 +236,22 @@ static void setup_devices(struct npu2 *npu)
>   		switch (dev->type) {
>   		case NPU2_DEV_TYPE_NVLINK:
>   			nvlink_detected = true;
> +			dt_add_property_strings(dev->dt_node,
> +						"ibm,npu-link-type",
> +						"nvlink");
>   			break;
>   		case NPU2_DEV_TYPE_OPENCAPI:
>   			ocapi_detected = true;
> +			dt_add_property_strings(dev->dt_node,
> +						"ibm,npu-link-type",
> +						"opencapi");
>   			break;
>   		default:
>   			prlog(PR_INFO, "NPU: Link %d device not present\n",
>   			      npu->devices[i].link_index);
> +			dt_add_property_strings(dev->dt_node,
> +						"ibm,npu-link-type",
> +						"unknown");
>   		}
>   	}
> 
> @@ -220,8 +289,14 @@ void probe_npu2(void)
>   		prlog(PR_WARNING, "NPU2: Using ZCAL impedance override = %d\n", nv_zcal_nominal);
>   	}
> 
> +	if (!platform.npu2_device_detect) {
> +		prlog(PR_INFO, "NPU: Platform does not support NPU\n");
> +		return;
> +	}
> +


This series doesn't define npu2_device_detect callback for ZZ 
(hostboot). Is that intentional, since some more work is needed anyway?



>   	dt_for_each_compatible(dt_root, np, "ibm,power9-npu") {
>   	        npu = setup_npu(np);
> +		platform.npu2_device_detect(npu);
>   		setup_devices(npu);
>   	}
>   }
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index ead8ec011341..fa28633bb150 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -52,14 +52,6 @@
>   #include <i2c.h>
>   #include <nvram.h>
> 
> -#define OCAPIDBG(dev, fmt, a...)    prlog(PR_DEBUG, "OCAPI[%d:%d]: " fmt, \
> -					  dev->npu->chip_id, dev->brick_index, ## a)
> -#define OCAPIINF(dev, fmt, a...)    prlog(PR_INFO, "OCAPI[%d:%d]: " fmt, \
> -					  dev->npu->chip_id, dev->brick_index, ## a)
> -#define OCAPIERR(dev, fmt, a...)    prlog(PR_ERR, "OCAPI[%d:%d]: " fmt, \
> -					  dev->npu->chip_id, dev->brick_index, ## a)
> -
> -
>   #define NPU_IRQ_LEVELS		35
>   #define NPU_IRQ_LEVELS_XSL	23
>   #define MAX_PE_HANDLE		((1 << 15) - 1)
> @@ -842,7 +834,7 @@ static void assert_reset(struct npu2_dev *dev)
>   	 * register and a pin is in output mode if its value is 0
>   	 */
>   	data = ~pin;
> -	rc = i2c_request_send(dev->i2c_port_id_ocapi,
> +	rc = i2c_request_send(dev->npu->i2c_port_id_ocapi,
>   			platform.ocapi->i2c_reset_addr, SMBUS_WRITE,
>   			0x3, 1,
>   			&data, sizeof(data), 120);
> @@ -851,7 +843,7 @@ static void assert_reset(struct npu2_dev *dev)
> 
>   	/* register 1 controls the signal, reset is active low */
>   	data = ~pin;
> -	rc = i2c_request_send(dev->i2c_port_id_ocapi,
> +	rc = i2c_request_send(dev->npu->i2c_port_id_ocapi,
>   			platform.ocapi->i2c_reset_addr, SMBUS_WRITE,
>   			0x1, 1,
>   			&data, sizeof(data), 120);
> @@ -874,7 +866,7 @@ static void deassert_reset(struct npu2_dev *dev)
>   	int rc;
> 
>   	data = 0xFF;
> -	rc = i2c_request_send(dev->i2c_port_id_ocapi,
> +	rc = i2c_request_send(dev->npu->i2c_port_id_ocapi,
>   			platform.ocapi->i2c_reset_addr, SMBUS_WRITE,
>   			0x1, 1,
>   			&data, sizeof(data), 120);
> @@ -888,43 +880,6 @@ static void deassert_reset(struct npu2_dev *dev)
>   	}
>   }
> 
> -static bool i2c_presence_detect(struct npu2_dev *dev)
> -{
> -	uint8_t state, data;
> -	int rc;
> -
> -	/*
> -	 * Opencapi presence detection is done through i2c
> -	 *
> -	 * Lagrange platforms (ZZ, Zaius) use the same default mechanism.
> -	 * Witherspoon will need a specific implementation, TBD.
> -	 */
> -	rc = i2c_request_send(dev->i2c_port_id_ocapi,
> -			platform.ocapi->i2c_presence_addr,
> -			SMBUS_READ, 0, 1,
> -			&state, 1, 120);
> -	if (rc) {
> -		OCAPIERR(dev, "error detecting link presence: %d\n", rc);
> -		return true; /* assume link exists */
> -	}
> -
> -	OCAPIDBG(dev, "I2C presence detect: 0x%x\n", state);
> -
> -	switch (dev->brick_index) { // TODO(ajd): Link or brick index?
> -	case 2:
> -		data = platform.ocapi->i2c_presence_odl0;
> -		break;
> -	case 3:
> -		data = platform.ocapi->i2c_presence_odl1;
> -		break;
> -	default:
> -		OCAPIERR(dev, "presence detection on invalid link\n");
> -		return true;
> -	}
> -	/* Presence detect bits are active low */
> -	return !(state & data);
> -}
> -
>   static void reset_odl(uint32_t gcid, struct npu2_dev *dev)
>   {
>   	uint64_t reg, config_xscom;
> @@ -1015,17 +970,10 @@ static void start_training(uint32_t gcid, struct npu2_dev *dev)
>   	xscom_write(gcid, config_xscom, reg);
>   }
> 
> -static int64_t npu2_opencapi_get_presence_state(struct pci_slot *slot,
> +static int64_t npu2_opencapi_get_presence_state(struct pci_slot __unused *slot,
>   						uint8_t *val)
>   {
> -	bool present;
> -	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
> -
> -	if (platform.ocapi->force_presence)
> -		present = true;
> -	else
> -		present = i2c_presence_detect(dev);
> -	*val = present;
> +	*val = true;
>   	return OPAL_SUCCESS;
>   }


Long pause here... Are we unconditionally returning true because if 
there's no card, the device will have been typed as 
NPU2_DEV_TYPE_UNKNOWN earlier, and therefore no slot created for it? If 
so, it's probably worth a comment.

   Fred



> @@ -1581,9 +1529,8 @@ static void setup_debug_training_state(struct npu2_dev *dev)
> 
>   static void setup_device(struct npu2_dev *dev)
>   {
> -	struct dt_node *dn_phb, *dn;
> +	struct dt_node *dn_phb;
>   	struct pci_slot *slot;
> -	char port_name[17];
>   	uint64_t mm_win[2];
> 
>   	/* Populate PHB device node */
> @@ -1629,23 +1576,6 @@ static void setup_device(struct npu2_dev *dev)
>   	dev->bdfn = 0;
>   	dev->train_need_fence = false;
>   	dev->train_fenced = false;
> -	/* Find I2C port for handling device reset */
> -	snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d",
> -		 dev->npu->chip_id, platform.ocapi->i2c_engine,
> -		 platform.ocapi->i2c_port);
> -	prlog(PR_DEBUG, "OCAPI: Looking for I2C port %s\n", port_name);
> -
> -	dt_for_each_compatible(dt_root, dn, "ibm,power9-i2c-port") {
> -		if (streq(port_name, dt_prop_get(dn, "ibm,port-name"))) {
> -			dev->i2c_port_id_ocapi = dt_prop_get_u32(dn, "ibm,opal-id");
> -			break;
> -		}
> -	}
> -
> -	if (!dev->i2c_port_id_ocapi) {
> -		prlog(PR_ERR, "OCAPI: Couldn't find I2C port %s\n", port_name);
> -		goto failed;
> -	}
> 
>   	/* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */
>   	/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
> @@ -1670,9 +1600,6 @@ static void setup_device(struct npu2_dev *dev)
>   	}
>   	pci_register_phb(&dev->phb_ocapi, OPAL_DYNAMIC_PHB_ID);
>   	return;
> -failed:
> -	dt_add_property_string(dn_phb, "status", "error");
> -	return;
>   }
> 
>   static void read_nvram_training_state(void)
> diff --git a/include/npu2.h b/include/npu2.h
> index 9fdc438bb4c1..6d28fd3e58f4 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -17,6 +17,7 @@
>   #ifndef __NPU2_H
>   #define __NPU2_H
> 
> +#include <pci.h>
>   #include <phys-map.h>
> 
>   /* Debugging options */
> @@ -36,6 +37,14 @@
>   #define NPU2DEVINF(p, fmt, a...)	NPU2DEVLOG(PR_INFO, p, fmt, ##a)
>   #define NPU2DEVERR(p, fmt, a...)	NPU2DEVLOG(PR_ERR, p, fmt, ##a)
> 
> +#define OCAPIDBG(dev, fmt, a...)    prlog(PR_DEBUG, "OCAPI[%d:%d]: " fmt, \
> +					  dev->npu->chip_id, dev->brick_index, ## a)
> +#define OCAPIINF(dev, fmt, a...)    prlog(PR_INFO, "OCAPI[%d:%d]: " fmt, \
> +					  dev->npu->chip_id, dev->brick_index, ## a)
> +#define OCAPIERR(dev, fmt, a...)    prlog(PR_ERR, "OCAPI[%d:%d]: " fmt, \
> +					  dev->npu->chip_id, dev->brick_index, ## a)
> +
> +
>   /* Number of PEs supported */
>   #define NPU2_MAX_PE_NUM		16
>   #define NPU2_RESERVED_PE_NUM	15
> @@ -136,7 +145,6 @@ struct npu2_dev {
> 
>   	/* OpenCAPI */
>   	struct phb		phb_ocapi;
> -	uint64_t		i2c_port_id_ocapi;
>   	bool			train_need_fence;
>   	bool			train_fenced;
>   };
> @@ -169,6 +177,8 @@ struct npu2 {
>   	/* NVLink */
>   	struct phb	phb_nvlink;
>   	uint32_t	phb_index;
> +
> +	uint64_t	i2c_port_id_ocapi;
>   };
> 
>   static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
> @@ -195,11 +205,11 @@ static inline struct phb *npu2_dev_to_phb(struct npu2_dev *ndev)
>   	}
>   }
> 
> +void npu2_i2c_presence_detect(struct npu2 *npu);
>   int npu2_opencapi_init_npu(struct npu2 *npu);
>   int npu2_nvlink_init_npu(struct npu2 *npu);
>   void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn);
> 
> -enum npu2_dev_type npu2_dt_link_dev_type(struct dt_node *link);
>   void npu2_write_4b(struct npu2 *p, uint64_t reg, uint32_t val);
>   uint32_t npu2_read_4b(struct npu2 *p, uint64_t reg);
>   void npu2_write(struct npu2 *p, uint64_t reg, uint64_t val);
> diff --git a/include/platform.h b/include/platform.h
> index 1a35a86a6f8b..efafbd2239b9 100644
> --- a/include/platform.h
> +++ b/include/platform.h
> @@ -22,6 +22,7 @@ struct phb;
>   struct pci_device;
>   struct pci_slot;
>   struct errorlog;
> +struct npu2;
> 
>   enum resource_id {
>   	RESOURCE_ID_KERNEL,
> @@ -76,6 +77,9 @@ struct platform {
>   	/* OpenCAPI platform-specific I2C information */
>   	const struct platform_ocapi *ocapi;
> 
> +	/* NPU2 device detection */
> +	void		(*npu2_device_detect)(struct npu2 *npu);
> +
>   	/*
>   	 * Probe platform, return true on a match, called before
>   	 * any allocation has been performed outside of the heap
> diff --git a/platforms/astbmc/witherspoon.c b/platforms/astbmc/witherspoon.c
> index d663709f8766..ce83ff9701d3 100644
> --- a/platforms/astbmc/witherspoon.c
> +++ b/platforms/astbmc/witherspoon.c
> @@ -27,6 +27,7 @@
>   #include <pci.h>
>   #include <pci-slot.h>
>   #include <phb4.h>
> +#include <npu2.h>
> 
>   #include "astbmc.h"
>   #include "ast.h"
> @@ -153,6 +154,14 @@ static void witherspoon_pre_pci_fixup(void)
>   	phb4_pre_pci_fixup_witherspoon();
>   }
> 
> +static void witherspoon_npu2_device_detect(struct npu2 *npu)
> +{
> +        /* Stub until we implement real device detection */
> +	for (int i = 0; i < npu->total_devices; i++) {
> +		npu->devices[i].type = NPU2_DEV_TYPE_NVLINK;
> +	}
> +}
> +
>   /* The only difference between these is the PCI slot handling */
> 
>   DECLARE_PLATFORM(witherspoon) = {
> @@ -170,4 +179,5 @@ DECLARE_PLATFORM(witherspoon) = {
>   	.terminate		= ipmi_terminate,
> 
>   	.pci_get_slot_info	= dt_slot_get_slot_info,
> +	.npu2_device_detect	= witherspoon_npu2_device_detect,
>   };
> diff --git a/platforms/astbmc/zaius.c b/platforms/astbmc/zaius.c
> index 2f296c3679ac..267e6578bbe7 100644
> --- a/platforms/astbmc/zaius.c
> +++ b/platforms/astbmc/zaius.c
> @@ -21,6 +21,7 @@
>   #include <ipmi.h>
>   #include <psi.h>
>   #include <npu-regs.h>
> +#include <npu2.h>
> 
>   #include "astbmc.h"
> 
> @@ -51,7 +52,6 @@ static void create_link(struct dt_node *npu, int group, int index)
>   	link = dt_new(npu, namebuf);
> 
>   	dt_add_property_string(link, "compatible", "ibm,npu-link");
> -	dt_add_property_string(link, "ibm,npu-link-type", "opencapi");
>   	dt_add_property_cells(link, "ibm,npu-link-index", index);
> 
>   	switch (index) {
> @@ -153,4 +153,5 @@ DECLARE_PLATFORM(zaius) = {
>   	.exit			= ipmi_wdt_final_reset,
>   	.terminate		= ipmi_terminate,
>   	.ocapi			= &zaius_ocapi,
> +	.npu2_device_detect	= npu2_i2c_presence_detect,
>   };
> 



More information about the Skiboot mailing list