[Skiboot] [PATCH v2 3/6] hw/npu2: Common NPU2 init routine between NVLink and OpenCAPI

Frederic Barrat fbarrat at linux.ibm.com
Thu Aug 30 02:26:08 AEST 2018



Le 27/08/2018 à 10:55, Andrew Donnellan a écrit :
> Replace probe_npu2() and probe_npu2_opencapi() with a new shared
> probe_npu2(). Refactor some of the common NPU setup code into shared code.
> 
> No functional change. This patch does not implement support for using both
> types of devices simultaneously on the same NPU - we expect to add this
> sometime in the future.
> 
> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> 
> ---
> 
> v1->v2:
> - cleaned up unneeded null check (Alistair)
> ---
>   core/init.c        |   2 +-
>   hw/npu2-common.c   | 118 +++++++++++++++++++++++-
>   hw/npu2-opencapi.c | 169 +++++++++++----------------------
>   hw/npu2.c          | 236 ++++++++++++++--------------------------------
>   include/npu2.h     |   6 +-
>   include/skiboot.h  |   1 +-
>   6 files changed, 257 insertions(+), 275 deletions(-)
> 
> diff --git a/core/init.c b/core/init.c
> index ca6c468c3941..9095981e67e0 100644
> --- a/core/init.c
> +++ b/core/init.c
> @@ -1126,8 +1126,6 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
>   	/* Probe NPUs */
>   	probe_npu();
>   	probe_npu2();
> -	/* TODO: Eventually, we'll do NVLink and OpenCAPI together */
> -	probe_npu2_opencapi();
> 
>   	/* Initialize PCI */
>   	pci_init_slots();
> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
> index d076b4906fcc..7346e704fc93 100644
> --- a/hw/npu2-common.c
> +++ b/hw/npu2-common.c
> @@ -20,6 +20,7 @@
>   #include <npu2.h>
>   #include <npu2-regs.h>
>   #include <bitutils.h>
> +#include <nvram.h>
> 
>   enum npu2_dev_type npu2_dt_link_dev_type(struct dt_node *link)
>   {
> @@ -107,3 +108,120 @@ void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mas
>   	npu2_scom_write(p->chip_id, p->xscom_base, reg, NPU2_MISC_DA_LEN_4B,
>   			(uint64_t)new_val << 32);
>   }
> +
> +static struct npu2 *setup_npu(struct dt_node *dn)
> +{
> +	struct npu2 *npu;
> +	struct npu2_dev *dev;
> +	struct dt_node *np;
> +	uint32_t num_links;
> +	void *npumem;
> +	char *path;
> +	int gcid;
> +	struct proc_chip *chip;
> +	int i = 0;
> +
> +	/* Retrieve chip ID */
> +	path = dt_get_path(dn);
> +	gcid = dt_get_chip_id(dn);
> +	chip = get_chip(gcid);
> +	assert(chip);
> +
> +	num_links = dt_prop_get_u32(dn, "ibm,npu-links");
> +	npumem = zalloc(sizeof(struct npu2) + num_links *
> +			sizeof(struct npu2_dev));
> +	assert(npumem);
> +	npu = npumem;
> +
> +	npu->dt_node = dn;
> +	npu->index = dt_prop_get_u32(dn, "ibm,npu-index");
> +	npu->chip_id = gcid;
> +	npu->xscom_base = dt_get_address(dn, 0, NULL);
> +	npu->phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
> +	npu->devices = npumem + sizeof(struct npu2);
> +
> +	dt_for_each_compatible(dn, np, "ibm,npu-link") {
> +		dev = &npu->devices[i];
> +		dev->link_index = dt_prop_get_u32(np, "ibm,npu-link-index");
> +		/* May be overridden by platform presence detection */
> +		dev->brick_index = dev->link_index;
> +		dev->type = npu2_dt_link_dev_type(np);
> +		dev->npu = npu;
> +		dev->dt_node = np;
> +		dev->pl_xscom_base = dt_prop_get_u64(np, "ibm,npu-phy");
> +		dev->lane_mask = dt_prop_get_u32(np, "ibm,npu-lane-mask");
> +		dev->link_speed = dt_prop_get_u64(np, "ibm,link-speed");
> +		i++;

I think we should add a check to make sure we're not overflowing our 
array of devices if somehow the device tree data is not consistent and 
we have more "ibm,npu-link" than advertised in "ibm,npu-links".



> +	};
> +	npu->total_devices = i;
> +
> +	prlog(PR_INFO, "NPU: Chip %d Found NPU2#%d (%d links) at %s\n",
> +	      npu->chip_id, npu->index, npu->total_devices, path);
> +	prlog(PR_INFO, "   SCOM Base:  %08llx\n", npu->xscom_base);
> +	free(path);
> +	return npu;
> +}
> +
> +static void setup_devices(struct npu2 *npu)
> +{
> +	bool nvlink_detected = false, ocapi_detected = false;
> +	struct npu2_dev *dev;
> +
> +	/*
> +	 * TODO: In future, we'll do brick configuration here to support mixed
> +	 * setups.
> +	 */
> +	for (int i = 0; i < npu->total_devices; i++) {
> +		dev = &npu->devices[i];
> +		switch (dev->type) {
> +		case NPU2_DEV_TYPE_NVLINK:
> +			nvlink_detected = true;
> +			break;
> +		case NPU2_DEV_TYPE_OPENCAPI:
> +			ocapi_detected = true;
> +			break;
> +		default:
> +			prlog(PR_INFO, "NPU: Link %d device not present\n",
> +			      npu->devices[i].link_index);
> +		}
> +	}
> +
> +	if (nvlink_detected && ocapi_detected) {
> +		prlog(PR_ERR, "NPU: NVLink and OpenCAPI devices on same chip not supported, aborting NPU init\n");
> +		return;
> +	}
> +
> +	if (nvlink_detected)
> +		npu2_nvlink_init_npu(npu);
> +	else if (ocapi_detected)
> +		npu2_opencapi_init_npu(npu);
> +}
> +
> +void probe_npu2(void)
> +{
> +	struct proc_chip *chip = next_chip(NULL);
> +	struct npu2 *npu;
> +	struct dt_node *np;
> +	const char *zcal;
> +
> +	/* Abort if we're running on DD1 */
> +	if (chip &&
> +	    (chip->type == PROC_CHIP_P9_NIMBUS ||
> +	     chip->type == PROC_CHIP_P9_CUMULUS) &&
> +	    (chip->ec_level & 0xf0) == 0x10) {
> +		prlog(PR_INFO, "NPU2: DD1 not supported\n");
> +		return;
> +	}
> +
> +	/* Check for a zcal override */
> +	zcal = nvram_query("nv_zcal_override");
> +	if (zcal) {
> +		nv_zcal_nominal = atoi(zcal);
> +		prlog(PR_WARNING, "NPU2: Using ZCAL impedance override = %d\n", nv_zcal_nominal);
> +	}
> +
> +	dt_for_each_compatible(dt_root, np, "ibm,power9-npu") {
> +	        npu = setup_npu(np);
> +		setup_devices(npu);
> +	}
> +}
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 68ae93a2f836..ead8ec011341 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -1446,11 +1446,14 @@ static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd,
>   	 * Add the addresses of the registers needed by the OS to handle
>   	 * faults. The OS accesses them by mmio.
>   	 */
> -	dsisr  = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DSISR(stacku, block);
> -	dar    = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DAR(stacku, block);
> -	tfc    = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_TFC(stacku, block);
> -	handle = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_PEHANDLE(stacku,
> -								block);
> +	dsisr  = (uint64_t) dev->npu->ocapi_global_mmio_base +
> +		NPU2_OTL_OSL_DSISR(stacku, block);
> +	dar    = (uint64_t) dev->npu->ocapi_global_mmio_base +
> +		NPU2_OTL_OSL_DAR(stacku, block);
> +	tfc    = (uint64_t) dev->npu->ocapi_global_mmio_base +
> +		NPU2_OTL_OSL_TFC(stacku, block);
> +	handle = (uint64_t) dev->npu->ocapi_global_mmio_base +
> +		NPU2_OTL_OSL_PEHANDLE(stacku, block);
>   	dt_add_property_cells(pd->dn, "ibm,opal-xsl-irq", irq);
>   	dt_add_property_cells(pd->dn, "ibm,opal-xsl-mmio",
>   			hi32(dsisr), lo32(dsisr),
> @@ -1576,20 +1579,15 @@ static void setup_debug_training_state(struct npu2_dev *dev)
>   	}
>   }
> 
> -static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
> -				       struct npu2_dev *dev)
> +static void setup_device(struct npu2_dev *dev)
>   {
> -	uint32_t dev_index, npu_index;
>   	struct dt_node *dn_phb, *dn;
>   	struct pci_slot *slot;
>   	char port_name[17];
>   	uint64_t mm_win[2];
> 
> -	dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> -	npu_index = dt_prop_get_u32(n->dt_node, "ibm,npu-index");
> -
>   	/* Populate PHB device node */
> -	phys_map_get(n->chip_id, NPU_OCAPI_MMIO, dev_index, &mm_win[0],
> +	phys_map_get(dev->npu->chip_id, NPU_OCAPI_MMIO, dev->brick_index, &mm_win[0],
>   		     &mm_win[1]);
>   	prlog(PR_DEBUG, "OCAPI: Setting MMIO window to %016llx + %016llx\n",
>   	      mm_win[0], mm_win[1]);
> @@ -1609,40 +1607,28 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
> 
>   	dt_add_property_strings(dn_phb, "device_type", "pciex");
>   	dt_add_property(dn_phb, "reg", mm_win, sizeof(mm_win));
> -	dt_add_property_cells(dn_phb, "ibm,npu-index", npu_index);
> -	dt_add_property_cells(dn_phb, "ibm,chip-id", n->chip_id);
> -	dt_add_property_cells(dn_phb, "ibm,xscom-base", n->xscom_base);
> -	dt_add_property_cells(dn_phb, "ibm,npcq", n->dt_node->phandle);
> +	dt_add_property_cells(dn_phb, "ibm,npu-index", dev->npu->index);
> +	dt_add_property_cells(dn_phb, "ibm,chip-id", dev->npu->chip_id);
> +	dt_add_property_cells(dn_phb, "ibm,xscom-base", dev->npu->xscom_base);
> +	dt_add_property_cells(dn_phb, "ibm,npcq", dev->npu->dt_node->phandle);
>   	dt_add_property_cells(dn_phb, "ibm,links", 1);
>   	dt_add_property(dn_phb, "ibm,mmio-window", mm_win, sizeof(mm_win));
>   	dt_add_property_cells(dn_phb, "ibm,phb-diag-data-size", 0);
>   	dt_add_property_cells(dn_phb, "ibm,opal-num-pes", NPU2_MAX_PE_NUM);
> 
> -	n->mm_base = mm_win[0];
> -	n->mm_size = mm_win[1];
> -
>   	dt_add_property_cells(dn_phb, "ranges", 0x02000000,
> -			      hi32(n->mm_base), lo32(n->mm_base),
> -			      hi32(n->mm_base), lo32(n->mm_base),
> -			      hi32(n->mm_size), lo32(n->mm_size));
> +			      hi32(mm_win[0]), lo32(mm_win[0]),
> +			      hi32(mm_win[0]), lo32(mm_win[0]),
> +			      hi32(mm_win[1]), lo32(mm_win[1]));
> 
> -	dev->type = NPU2_DEV_TYPE_OPENCAPI;
> -	dev->npu = n;
> -	dev->dt_node = dn_link;
>   	dev->phb_ocapi.dt_node = dn_phb;
>   	dev->phb_ocapi.ops = &npu2_opencapi_ops;
>   	dev->phb_ocapi.phb_type = phb_type_npu_v2_opencapi;
>   	dev->phb_ocapi.scan_map = 0;
> -	dev->link_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> -	dev->brick_index = dev->link_index;
> -	dev->pl_xscom_base = dt_prop_get_u64(dn_link, "ibm,npu-phy");
> -	dev->lane_mask = dt_prop_get_u32(dn_link, "ibm,npu-lane-mask");
> -	dev->link_speed = dt_prop_get_u64(dn_link, "ibm,link-speed");
> +
>   	dev->bdfn = 0;
>   	dev->train_need_fence = false;
>   	dev->train_fenced = false;
> -	n->total_devices++;
> -
>   	/* Find I2C port for handling device reset */
>   	snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d",
>   		 dev->npu->chip_id, platform.ocapi->i2c_engine,
> @@ -1663,11 +1649,11 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
> 
>   	/* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */
>   	/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
> -	setup_afu_mmio_bars(n->chip_id, n->xscom_base, dev);
> +	setup_afu_mmio_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
>   	/* Procedure 13.1.3.9 - AFU Config BARs */
> -	setup_afu_config_bars(n->chip_id, n->xscom_base, dev);
> +	setup_afu_config_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
> 
> -	set_fence_control(n->chip_id, n->xscom_base, dev->brick_index, 0b00);
> +	set_fence_control(dev->npu->chip_id, dev->npu->xscom_base, dev->brick_index, 0b00);
> 
>   	if (npu2_ocapi_training_state != NPU2_TRAIN_DEFAULT) {
>   		setup_debug_training_state(dev);
> @@ -1689,107 +1675,72 @@ failed:
>   	return;
>   }
> 
> -static void npu2_opencapi_probe(struct dt_node *dn)
> +static void read_nvram_training_state(void)
>   {
> -	struct dt_node *link;
> -	char *path;
> -	uint32_t gcid, index, links, scom_base;
> -	uint64_t reg[2];
> -	uint64_t dev_index;
> -	struct npu2 *n;
> -	int rc, i = 0;
> -
> -	gcid = dt_get_chip_id(dn);
> -	index = dt_prop_get_u32(dn, "ibm,npu-index");
> -	links = dt_prop_get_u32(dn, "ibm,npu-links");
> -
> -	/* Don't try to init when we have an NVLink link */
> -	dt_for_each_compatible(dn, link, "ibm,npu-link") {
> -		if (npu2_dt_link_dev_type(link) != NPU2_DEV_TYPE_OPENCAPI) {
> -			prlog(PR_DEBUG,
> -			      "OCAPI: NPU%d: Non-OpenCAPI link found, skipping OpenCAPI init\n",
> -			      index);
> -			return;
> -		}
> +	const char *state;
> +
> +	state = nvram_query("opencapi-link-training");
> +	if (state) {
> +		if (!strcmp(state, "prbs31"))
> +			npu2_ocapi_training_state = NPU2_TRAIN_PRBS31;
> +		else if (!strcmp(state, "none"))
> +			npu2_ocapi_training_state = NPU2_TRAIN_NONE;
> +		else
> +			prlog(PR_WARNING,
> +			      "OCAPI: invalid training state in NVRAM: %s\n",
> +			      state);
>   	}
> +}
> 
> -	path = dt_get_path(dn);
> -	prlog(PR_INFO, "OCAPI: Chip %d Found OpenCAPI NPU%d (%d links) at %s\n",
> -	      gcid, index, links, path);
> -	free(path);
> +int npu2_opencapi_init_npu(struct npu2 *npu)
> +{
> +	struct npu2_dev *dev;
> +	uint64_t reg[2];
> +	int rc;
> 
>   	assert(platform.ocapi);
> +	read_nvram_training_state();
> 
>   	/* TODO: Test OpenCAPI with fast reboot and make it work */
>   	disable_fast_reboot("OpenCAPI device enabled");
> 
> -	scom_base = dt_get_address(dn, 0, NULL);
> -	prlog(PR_INFO, "OCAPI:	 SCOM Base:  %08x\n", scom_base);
> +	setup_global_mmio_bar(npu->chip_id, npu->xscom_base, reg);
> 
> -	setup_global_mmio_bar(gcid, scom_base, reg);
> +	npu->ocapi_global_mmio_base = (void *)reg[0];

How is this different from npu->regs that npu2.c is setting up? Do we 
have 2 different variables for the same thing, nvlink using one and 
opencapi using the other? I'm probably missing something...

More generally, we have fields in the struct npu2 which are either used 
for nvlink or opencapi. And it's getting confusing to know which ones 
are defined. At the very minimum, we should probably make it clear 
through comments or ordering which is defined where. A lot of them 
already were, I think we've just adding a few, maybe less obvious ones: 
regs, mm_base, mm_size. I could have missed some.
A per-type substructure would work, but it's more cumbersome to use, so 
I don't have strong feelings there.


> 
> -	n = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev));
> -	n->devices = (struct npu2_dev *)(n + 1);
> -	n->chip_id = gcid;
> -	n->xscom_base = scom_base;
> -	n->regs = (void *)reg[0];
> -	n->dt_node = dn;
> +	for (int i = 0; i < npu->total_devices; i++) {
> +		dev = &npu->devices[i];
> +		if (dev->type != NPU2_DEV_TYPE_OPENCAPI)
> +			continue;
> 
> -	dt_for_each_compatible(dn, link, "ibm,npu-link") {
> -		dev_index = dt_prop_get_u32(link, "ibm,npu-link-index");
> -		prlog(PR_INFO, "OCAPI: Configuring link index %lld\n",
> -		      dev_index);
> +		prlog(PR_INFO, "OCAPI: Configuring link index %d, brick %d\n",
> +		      dev->link_index, dev->brick_index);
> 
>   		/* Procedure 13.1.3.1 - Select OCAPI vs NVLink */
> -		brick_config(gcid, scom_base, dev_index);
> +		brick_config(npu->chip_id, npu->xscom_base, dev->brick_index);
> 
>   		/* Procedure 13.1.3.5 - Transaction Layer Configuration */
> -		tl_config(gcid, scom_base, dev_index);
> +		tl_config(npu->chip_id, npu->xscom_base, dev->brick_index);
> 
>   		/* Procedure 13.1.3.6 - Address Translation Configuration */
> -		address_translation_config(gcid, scom_base, dev_index);
> +		address_translation_config(npu->chip_id, npu->xscom_base, dev->brick_index);
>   	}
> 
>   	/* Procedure 13.1.3.10 - Interrupt Configuration */
> -	rc = setup_irq(n);
> +	rc = setup_irq(npu);
>   	if (rc)
>   		goto failed;
> 
> -	dt_for_each_compatible(dn, link, "ibm,npu-link") {
> -		npu2_opencapi_setup_device(link, n, &n->devices[i]);
> -		i++;
> +	for (int i = 0; i < npu->total_devices; i++) {


Just to make sure we are on the same page: the only reason why we broke 
that loop into 2 parts, with setup_irq() in the middle, is because we do 
all the hw setup first, and we only want to call the 2nd part, which 
modifies the device tree and will give visibility of the hw to the OS, 
when we are sure that all the hw setup was successful for all the links. 
Is that right?

   Fred


> +		dev = &npu->devices[i];
> +		if (dev->type != NPU2_DEV_TYPE_OPENCAPI)
> +			continue;
> +		setup_device(dev);
>   	}
> 
> -	return;
> +	return 0;
>   failed:
> -	free(n);
> -}
> -
> -static void read_nvram_training_state(void)
> -{
> -	const char *state;
> -
> -	state = nvram_query("opencapi-link-training");
> -	if (state) {
> -		if (!strcmp(state, "prbs31"))
> -			npu2_ocapi_training_state = NPU2_TRAIN_PRBS31;
> -		else if (!strcmp(state, "none"))
> -			npu2_ocapi_training_state = NPU2_TRAIN_NONE;
> -		else
> -			prlog(PR_WARNING,
> -			      "OCAPI: invalid training state in NVRAM: %s\n",
> -			      state);
> -	}
> -}
> -
> -void probe_npu2_opencapi(void)
> -{
> -	struct dt_node *np_npu;
> -
> -	read_nvram_training_state();
> -
> -	dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu")
> -		npu2_opencapi_probe(np_npu);
> +	return -1;
>   }
> 
>   static const struct phb_ops npu2_opencapi_ops = {
> diff --git a/hw/npu2.c b/hw/npu2.c
> index be1c304420fc..419dd37a24b1 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -1357,44 +1357,13 @@ static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2], uint
>   }
> 
>   /*
> - * Probe NPU2 device node and create PCI root device node
> - * accordingly. The NPU2 device node should specify number
> - * of links and xscom base address to access links.
> + * Set up NPU for NVLink and create PCI root device node
> + * accordingly.
>    */
> -static void npu2_probe_phb(struct dt_node *dn)
> +int npu2_nvlink_init_npu(struct npu2 *npu)
>   {
> -	struct proc_chip *proc_chip;
> -	struct dt_node *np, *link;
> -	bool ocapi_detected = false, nvlink_detected = false;
> -	uint32_t gcid, scom, index, phb_index, links;
> +	struct dt_node *np;
>   	uint64_t reg[2], mm_win[2], val;
> -	char *path;
> -
> -	/* Abort if any OpenCAPI links detected */
> -	dt_for_each_compatible(dn, link, "ibm,npu-link") {
> -		if (npu2_dt_link_dev_type(link) == NPU2_DEV_TYPE_OPENCAPI)
> -			ocapi_detected = true;
> -		else
> -			nvlink_detected = true;
> -	}
> -
> -	if (ocapi_detected && nvlink_detected) {
> -		prlog(PR_ERR, "NPU: NVLink and OpenCAPI devices on same chip not supported\n");
> -	        return;
> -	} else if (ocapi_detected) {
> -		prlog(PR_INFO, "NPU: OpenCAPI link configuration detected, not initialising NVLink\n");
> -		return;
> -	}
> -
> -	/* Retrieve chip id */
> -	path = dt_get_path(dn);
> -	gcid = dt_get_chip_id(dn);
> -	proc_chip = get_chip(gcid);
> -	assert(proc_chip);
> -	if ((proc_chip->ec_level & 0xf0) > 0x20) {
> -		prerror("NPU: unsupported ec level on Chip 0x%x!\n", gcid);
> -		return;
> -	}
> 
>   	/* TODO: Clean this up with register names, etc. when we get
>   	 * time. This just turns NVLink mode on in each brick and should
> @@ -1403,64 +1372,56 @@ static void npu2_probe_phb(struct dt_node *dn)
>   	 *
>   	 * Obviously if the year is now 2020 that didn't happen and you
>   	 * should fix this :-) */
> -	xscom_write_mask(gcid, 0x5011000, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011030, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011060, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011090, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011200, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011230, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011260, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011290, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011400, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011430, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011460, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011490, PPC_BIT(58), PPC_BIT(58));
> -
> -	xscom_write_mask(gcid, 0x50110c0, PPC_BIT(53), PPC_BIT(53));
> -	xscom_write_mask(gcid, 0x50112c0, PPC_BIT(53), PPC_BIT(53));
> -	xscom_write_mask(gcid, 0x50114c0, PPC_BIT(53), PPC_BIT(53));
> -	xscom_write_mask(gcid, 0x50110f1, PPC_BIT(41), PPC_BIT(41));
> -	xscom_write_mask(gcid, 0x50112f1, PPC_BIT(41), PPC_BIT(41));
> -	xscom_write_mask(gcid, 0x50114f1, PPC_BIT(41), PPC_BIT(41));
> +	xscom_write_mask(npu->chip_id, 0x5011000, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011030, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011060, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011090, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011200, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011230, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011260, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011290, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011400, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011430, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011460, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011490, PPC_BIT(58), PPC_BIT(58));
> +
> +	xscom_write_mask(npu->chip_id, 0x50110c0, PPC_BIT(53), PPC_BIT(53));
> +	xscom_write_mask(npu->chip_id, 0x50112c0, PPC_BIT(53), PPC_BIT(53));
> +	xscom_write_mask(npu->chip_id, 0x50114c0, PPC_BIT(53), PPC_BIT(53));
> +	xscom_write_mask(npu->chip_id, 0x50110f1, PPC_BIT(41), PPC_BIT(41));
> +	xscom_write_mask(npu->chip_id, 0x50112f1, PPC_BIT(41), PPC_BIT(41));
> +	xscom_write_mask(npu->chip_id, 0x50114f1, PPC_BIT(41), PPC_BIT(41));
> 
>   	val = NPU2_NTL_MISC_CFG2_BRICK_ENABLE |
>   	      NPU2_NTL_MISC_CFG2_NDL_TX_PARITY_ENA |
>   	      NPU2_NTL_MISC_CFG2_NDL_PRI_PARITY_ENA |
>   	      NPU2_NTL_MISC_CFG2_RCV_CREDIT_OVERFLOW_ENA;
> -	xscom_write_mask(gcid, 0x5011110, val, val);
> -	xscom_write_mask(gcid, 0x5011130, val, val);
> -	xscom_write_mask(gcid, 0x5011310, val, val);
> -	xscom_write_mask(gcid, 0x5011330, val, val);
> -	xscom_write_mask(gcid, 0x5011510, val, val);
> -	xscom_write_mask(gcid, 0x5011530, val, val);
> +	xscom_write_mask(npu->chip_id, 0x5011110, val, val);
> +	xscom_write_mask(npu->chip_id, 0x5011130, val, val);
> +	xscom_write_mask(npu->chip_id, 0x5011310, val, val);
> +	xscom_write_mask(npu->chip_id, 0x5011330, val, val);
> +	xscom_write_mask(npu->chip_id, 0x5011510, val, val);
> +	xscom_write_mask(npu->chip_id, 0x5011530, val, val);
> 
>   	val = PPC_BIT(6) | PPC_BIT(7) | PPC_BIT(11);
> -	xscom_write_mask(gcid, 0x5011009, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011039, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011069, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011099, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011209, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011239, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011269, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011299, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011409, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011439, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011469, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011499, val, PPC_BITMASK(6,11));
> -
> -	index = dt_prop_get_u32(dn, "ibm,npu-index");
> -	phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
> -	links = dt_prop_get_u32(dn, "ibm,npu-links");
> -	prlog(PR_INFO, "NPU: Chip %d Found NPU2#%d (%d links) at %s\n",
> -	      gcid, index, links, path);
> -	free(path);
> -
> -	/* Retrieve scom base address */
> -	scom = dt_get_address(dn, 0, NULL);
> -	prlog(PR_INFO, "   SCOM Base:  %08x\n", scom);
> +	xscom_write_mask(npu->chip_id, 0x5011009, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011039, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011069, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011099, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011209, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011239, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011269, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011299, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011409, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011439, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011469, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011499, val, PPC_BITMASK(6,11));
> 
>   	/* Reassign the BARs */
> -	assign_mmio_bars(gcid, scom, reg, mm_win);
> +	assign_mmio_bars(npu->chip_id, npu->xscom_base, reg, mm_win);
> +	npu->regs = (void *)reg[0];
> +	npu->mm_base = mm_win[0];
> +	npu->mm_size = mm_win[1];
> 
>   	if (reg[0] && reg[1])
>   		prlog(PR_INFO, "   Global MMIO BAR:  %016llx (%lldMB)\n",
> @@ -1477,17 +1438,21 @@ static void npu2_probe_phb(struct dt_node *dn)
>   				"ibm,ioda2-npu2-phb");
>   	dt_add_property_strings(np, "device_type", "pciex");
>   	dt_add_property(np, "reg", reg, sizeof(reg));
> -	dt_add_property_cells(np, "ibm,phb-index", phb_index);
> -	dt_add_property_cells(np, "ibm,npu-index", index);
> -	dt_add_property_cells(np, "ibm,chip-id", gcid);
> -	dt_add_property_cells(np, "ibm,xscom-base", scom);
> -	dt_add_property_cells(np, "ibm,npcq", dn->phandle);
> -	dt_add_property_cells(np, "ibm,links", links);
> +	dt_add_property_cells(np, "ibm,phb-index", npu->phb_index);
> +	dt_add_property_cells(np, "ibm,npu-index", npu->index);
> +	dt_add_property_cells(np, "ibm,chip-id", npu->chip_id);
> +	dt_add_property_cells(np, "ibm,xscom-base", npu->xscom_base);
> +	dt_add_property_cells(np, "ibm,npcq", npu->dt_node->phandle);
> +	dt_add_property_cells(np, "ibm,links", npu->total_devices);
>   	dt_add_property(np, "ibm,mmio-window", mm_win, sizeof(mm_win));
>   	dt_add_property_cells(np, "ibm,phb-diag-data-size", 0);
> 
>   	/* Disable fast reboot - not currently supported */
>   	disable_fast_reboot("NVLink device enabled");
> +
> +	npu2_nvlink_create_phb(npu, np);
> +
> +	return 0;
>   }
> 
>   static uint32_t npu2_populate_pcie_cap(struct npu2_dev *dev,
> @@ -1949,49 +1914,25 @@ static void npu2_setup_irqs(struct npu2 *p)
>   	npu2_write(p, reg, val);
>   }
> 
> -static void npu2_create_phb(struct dt_node *dn)
> +void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn)
>   {
> -	const struct dt_property *prop;
> -	struct npu2 *p;
>   	struct pci_slot *slot;
> -	uint32_t links;
> -	void *pmem;
> -
> -	/* Retrieve number of devices */
> -	links = dt_prop_get_u32(dn, "ibm,links");
> -	pmem = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev));
> -	assert(pmem);
> -
> -	/* Populate PHB */
> -	p = pmem;
> -	p->index = dt_prop_get_u32(dn, "ibm,phb-index");
> -	p->chip_id = dt_prop_get_u32(dn, "ibm,chip-id");
> -	p->xscom_base = dt_prop_get_u32(dn, "ibm,xscom-base");
> -	p->total_devices = links;
> -	p->regs = (void *)dt_get_address(dn, 0, NULL);
> -
> -	prop = dt_require_property(dn, "ibm,mmio-window", -1);
> -	assert(prop->len >= (2 * sizeof(uint64_t)));
> -	p->mm_base = ((const uint64_t *)prop->prop)[0];
> -	p->mm_size = ((const uint64_t *)prop->prop)[1];
> -
> -	p->devices = pmem + sizeof(struct npu2);
> 
>   	/* Generic PHB */
> -	p->phb_nvlink.dt_node = dn;
> -	p->phb_nvlink.ops = &npu_ops;
> -	p->phb_nvlink.phb_type = phb_type_npu_v2;
> -	init_lock(&p->lock);
> -	init_lock(&p->phb_nvlink.lock);
> -	list_head_init(&p->phb_nvlink.devices);
> -	list_head_init(&p->phb_nvlink.virt_devices);
> -
> -	npu2_setup_irqs(p);
> -	npu2_populate_devices(p, dn);
> -	npu2_add_interrupt_map(p, dn);
> -	npu2_add_phb_properties(p);
> -
> -	slot = npu2_slot_create(&p->phb_nvlink);
> +	npu->phb_nvlink.dt_node = dn;
> +	npu->phb_nvlink.ops = &npu_ops;
> +	npu->phb_nvlink.phb_type = phb_type_npu_v2;
> +	init_lock(&npu->lock);
> +	init_lock(&npu->phb_nvlink.lock);
> +	list_head_init(&npu->phb_nvlink.devices);
> +	list_head_init(&npu->phb_nvlink.virt_devices);
> +
> +	npu2_setup_irqs(npu);
> +	npu2_populate_devices(npu, dn);
> +	npu2_add_interrupt_map(npu, dn);
> +	npu2_add_phb_properties(npu);
> +
> +	slot = npu2_slot_create(&npu->phb_nvlink);
>   	if (!slot)
>   	{
>   		/**
> @@ -2002,41 +1943,10 @@ static void npu2_create_phb(struct dt_node *dn)
>   		prlog(PR_ERR, "NPU: Cannot create PHB slot\n");
>   	}
> 
> -	pci_register_phb(&p->phb_nvlink, OPAL_DYNAMIC_PHB_ID);
> -
> -	npu2_init_ioda_cache(p);
> -	npu2_hw_init(p);
> -}
> -
> -void probe_npu2(void)
> -{
> -	struct proc_chip *chip = next_chip(NULL);
> -	struct dt_node *np;
> -	const char *zcal;
> -
> -	/* Abort if we're running on DD1 */
> -	if (chip &&
> -	    (chip->type == PROC_CHIP_P9_NIMBUS ||
> -	     chip->type == PROC_CHIP_P9_CUMULUS) &&
> -	    (chip->ec_level & 0xf0) == 0x10) {
> -		prlog(PR_INFO, "NPU: DD1 not supported\n");
> -		return;
> -	}
> -
> -	/* Check for a zcal override */
> -	zcal = nvram_query("nv_zcal_override");
> -	if (zcal) {
> -		nv_zcal_nominal = atoi(zcal);
> -		prlog(PR_WARNING, "NPU: Using ZCAL impedance override = %d\n", nv_zcal_nominal);
> -	}
> -
> -	/* Scan NPU2 XSCOM nodes */
> -	dt_for_each_compatible(dt_root, np, "ibm,power9-npu")
> -		npu2_probe_phb(np);
> +	pci_register_phb(&npu->phb_nvlink, OPAL_DYNAMIC_PHB_ID);
> 
> -	/* Scan newly created PHB nodes */
> -	dt_for_each_compatible(dt_root, np, "ibm,power9-npu-pciex")
> -		npu2_create_phb(np);
> +	npu2_init_ioda_cache(npu);
> +	npu2_hw_init(npu);
>   }
> 
>   /*
> diff --git a/include/npu2.h b/include/npu2.h
> index 10742031ec0f..9fdc438bb4c1 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -147,6 +147,7 @@ struct npu2 {
>   	uint32_t	chip_id;
>   	uint64_t	xscom_base;
>   	void		*regs;
> +	void		*ocapi_global_mmio_base;
>   	uint64_t	mm_base;
>   	uint64_t	mm_size;
>   	uint32_t	base_lsi;
> @@ -167,6 +168,7 @@ struct npu2 {
> 
>   	/* NVLink */
>   	struct phb	phb_nvlink;
> +	uint32_t	phb_index;
>   };
> 
>   static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
> @@ -193,6 +195,10 @@ static inline struct phb *npu2_dev_to_phb(struct npu2_dev *ndev)
>   	}
>   }
> 
> +int npu2_opencapi_init_npu(struct npu2 *npu);
> +int npu2_nvlink_init_npu(struct npu2 *npu);
> +void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn);
> +
>   enum npu2_dev_type npu2_dt_link_dev_type(struct dt_node *link);
>   void npu2_write_4b(struct npu2 *p, uint64_t reg, uint32_t val);
>   uint32_t npu2_read_4b(struct npu2 *p, uint64_t reg);
> diff --git a/include/skiboot.h b/include/skiboot.h
> index bba76c12c39e..f2818b1a2326 100644
> --- a/include/skiboot.h
> +++ b/include/skiboot.h
> @@ -201,7 +201,6 @@ extern int preload_capp_ucode(void);
>   extern void preload_io_vpd(void);
>   extern void probe_npu(void);
>   extern void probe_npu2(void);
> -extern void probe_npu2_opencapi(void);
>   extern void uart_init(void);
>   extern void mbox_init(void);
>   extern void early_uart_init(void);
> 



More information about the Skiboot mailing list