[Skiboot] [PATCH v3 3/6] hw/npu2: Common NPU2 init routine between NVLink and OpenCAPI

Frederic Barrat fbarrat at linux.ibm.com
Fri Aug 31 23:59:32 AEST 2018



Le 31/08/2018 à 06:16, Andrew Donnellan a écrit :
> Replace probe_npu2() and probe_npu2_opencapi() with a new shared
> probe_npu2(). Refactor some of the common NPU setup code into shared code.
> 
> No functional change. This patch does not implement support for using both
> types of devices simultaneously on the same NPU - we expect to add this
> sometime in the future.
> 
> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> Acked-by: Reza Arbab <arbab at linux.ibm.com>
> Reviewed-by: Alistair Popple <alistair at popple.id.au>
> 
> ---
> 
> v1->v2:
> - cleaned up unneeded null check (Alistair)
> 
> v2->v3:
> - add check for npu->devices overflow (Fred)
> - get rid of ocapi_global_mmio_base, which I'd separated out for... some
> presumably very good reason that I can't remember (Fred)
> ---


Reviewed-by: Frederic Barrat <fbarrat at linux.ibm.com>


>   core/init.c        |   2 +-
>   hw/npu2-common.c   | 119 +++++++++++++++++++++++-
>   hw/npu2-opencapi.c | 158 ++++++++++---------------------
>   hw/npu2.c          | 236 ++++++++++++++--------------------------------
>   include/npu2.h     |   5 +-
>   include/skiboot.h  |   1 +-
>   6 files changed, 250 insertions(+), 271 deletions(-)
> 
> diff --git a/core/init.c b/core/init.c
> index ca6c468c3941..9095981e67e0 100644
> --- a/core/init.c
> +++ b/core/init.c
> @@ -1126,8 +1126,6 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
>   	/* Probe NPUs */
>   	probe_npu();
>   	probe_npu2();
> -	/* TODO: Eventually, we'll do NVLink and OpenCAPI together */
> -	probe_npu2_opencapi();
> 
>   	/* Initialize PCI */
>   	pci_init_slots();
> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
> index d076b4906fcc..476fc0683d56 100644
> --- a/hw/npu2-common.c
> +++ b/hw/npu2-common.c
> @@ -20,6 +20,7 @@
>   #include <npu2.h>
>   #include <npu2-regs.h>
>   #include <bitutils.h>
> +#include <nvram.h>
> 
>   enum npu2_dev_type npu2_dt_link_dev_type(struct dt_node *link)
>   {
> @@ -107,3 +108,121 @@ void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mas
>   	npu2_scom_write(p->chip_id, p->xscom_base, reg, NPU2_MISC_DA_LEN_4B,
>   			(uint64_t)new_val << 32);
>   }
> +
> +static struct npu2 *setup_npu(struct dt_node *dn)
> +{
> +	struct npu2 *npu;
> +	struct npu2_dev *dev;
> +	struct dt_node *np;
> +	uint32_t num_links;
> +	void *npumem;
> +	char *path;
> +	int gcid;
> +	struct proc_chip *chip;
> +	int i = 0;
> +
> +	/* Retrieve chip ID */
> +	path = dt_get_path(dn);
> +	gcid = dt_get_chip_id(dn);
> +	chip = get_chip(gcid);
> +	assert(chip);
> +
> +	num_links = dt_prop_get_u32(dn, "ibm,npu-links");
> +	npumem = zalloc(sizeof(struct npu2) + num_links *
> +			sizeof(struct npu2_dev));
> +	assert(npumem);
> +	npu = npumem;
> +
> +	npu->dt_node = dn;
> +	npu->index = dt_prop_get_u32(dn, "ibm,npu-index");
> +	npu->chip_id = gcid;
> +	npu->xscom_base = dt_get_address(dn, 0, NULL);
> +	npu->phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
> +	npu->devices = npumem + sizeof(struct npu2);
> +
> +	dt_for_each_compatible(dn, np, "ibm,npu-link") {
> +		assert(i < num_links);
> +		dev = &npu->devices[i];
> +		dev->link_index = dt_prop_get_u32(np, "ibm,npu-link-index");
> +		/* May be overridden by platform presence detection */
> +		dev->brick_index = dev->link_index;
> +		dev->type = npu2_dt_link_dev_type(np);
> +		dev->npu = npu;
> +		dev->dt_node = np;
> +		dev->pl_xscom_base = dt_prop_get_u64(np, "ibm,npu-phy");
> +		dev->lane_mask = dt_prop_get_u32(np, "ibm,npu-lane-mask");
> +		dev->link_speed = dt_prop_get_u64(np, "ibm,link-speed");
> +		i++;
> +	};
> +	npu->total_devices = i;
> +
> +	prlog(PR_INFO, "NPU: Chip %d Found NPU2#%d (%d links) at %s\n",
> +	      npu->chip_id, npu->index, npu->total_devices, path);
> +	prlog(PR_INFO, "   SCOM Base:  %08llx\n", npu->xscom_base);
> +	free(path);
> +	return npu;
> +}
> +
> +static void setup_devices(struct npu2 *npu)
> +{
> +	bool nvlink_detected = false, ocapi_detected = false;
> +	struct npu2_dev *dev;
> +
> +	/*
> +	 * TODO: In future, we'll do brick configuration here to support mixed
> +	 * setups.
> +	 */
> +	for (int i = 0; i < npu->total_devices; i++) {
> +		dev = &npu->devices[i];
> +		switch (dev->type) {
> +		case NPU2_DEV_TYPE_NVLINK:
> +			nvlink_detected = true;
> +			break;
> +		case NPU2_DEV_TYPE_OPENCAPI:
> +			ocapi_detected = true;
> +			break;
> +		default:
> +			prlog(PR_INFO, "NPU: Link %d device not present\n",
> +			      npu->devices[i].link_index);
> +		}
> +	}
> +
> +	if (nvlink_detected && ocapi_detected) {
> +		prlog(PR_ERR, "NPU: NVLink and OpenCAPI devices on same chip not supported, aborting NPU init\n");
> +		return;
> +	}
> +
> +	if (nvlink_detected)
> +		npu2_nvlink_init_npu(npu);
> +	else if (ocapi_detected)
> +		npu2_opencapi_init_npu(npu);
> +}
> +
> +void probe_npu2(void)
> +{
> +	struct proc_chip *chip = next_chip(NULL);
> +	struct npu2 *npu;
> +	struct dt_node *np;
> +	const char *zcal;
> +
> +	/* Abort if we're running on DD1 */
> +	if (chip &&
> +	    (chip->type == PROC_CHIP_P9_NIMBUS ||
> +	     chip->type == PROC_CHIP_P9_CUMULUS) &&
> +	    (chip->ec_level & 0xf0) == 0x10) {
> +		prlog(PR_INFO, "NPU2: DD1 not supported\n");
> +		return;
> +	}
> +
> +	/* Check for a zcal override */
> +	zcal = nvram_query("nv_zcal_override");
> +	if (zcal) {
> +		nv_zcal_nominal = atoi(zcal);
> +		prlog(PR_WARNING, "NPU2: Using ZCAL impedance override = %d\n", nv_zcal_nominal);
> +	}
> +
> +	dt_for_each_compatible(dt_root, np, "ibm,power9-npu") {
> +	        npu = setup_npu(np);
> +		setup_devices(npu);
> +	}
> +}
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 68ae93a2f836..270876f78aa3 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -1450,7 +1450,7 @@ static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd,
>   	dar    = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DAR(stacku, block);
>   	tfc    = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_TFC(stacku, block);
>   	handle = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_PEHANDLE(stacku,
> -								block);
> +								   block);
>   	dt_add_property_cells(pd->dn, "ibm,opal-xsl-irq", irq);
>   	dt_add_property_cells(pd->dn, "ibm,opal-xsl-mmio",
>   			hi32(dsisr), lo32(dsisr),
> @@ -1576,20 +1576,15 @@ static void setup_debug_training_state(struct npu2_dev *dev)
>   	}
>   }
> 
> -static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
> -				       struct npu2_dev *dev)
> +static void setup_device(struct npu2_dev *dev)
>   {
> -	uint32_t dev_index, npu_index;
>   	struct dt_node *dn_phb, *dn;
>   	struct pci_slot *slot;
>   	char port_name[17];
>   	uint64_t mm_win[2];
> 
> -	dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> -	npu_index = dt_prop_get_u32(n->dt_node, "ibm,npu-index");
> -
>   	/* Populate PHB device node */
> -	phys_map_get(n->chip_id, NPU_OCAPI_MMIO, dev_index, &mm_win[0],
> +	phys_map_get(dev->npu->chip_id, NPU_OCAPI_MMIO, dev->brick_index, &mm_win[0],
>   		     &mm_win[1]);
>   	prlog(PR_DEBUG, "OCAPI: Setting MMIO window to %016llx + %016llx\n",
>   	      mm_win[0], mm_win[1]);
> @@ -1609,40 +1604,28 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
> 
>   	dt_add_property_strings(dn_phb, "device_type", "pciex");
>   	dt_add_property(dn_phb, "reg", mm_win, sizeof(mm_win));
> -	dt_add_property_cells(dn_phb, "ibm,npu-index", npu_index);
> -	dt_add_property_cells(dn_phb, "ibm,chip-id", n->chip_id);
> -	dt_add_property_cells(dn_phb, "ibm,xscom-base", n->xscom_base);
> -	dt_add_property_cells(dn_phb, "ibm,npcq", n->dt_node->phandle);
> +	dt_add_property_cells(dn_phb, "ibm,npu-index", dev->npu->index);
> +	dt_add_property_cells(dn_phb, "ibm,chip-id", dev->npu->chip_id);
> +	dt_add_property_cells(dn_phb, "ibm,xscom-base", dev->npu->xscom_base);
> +	dt_add_property_cells(dn_phb, "ibm,npcq", dev->npu->dt_node->phandle);
>   	dt_add_property_cells(dn_phb, "ibm,links", 1);
>   	dt_add_property(dn_phb, "ibm,mmio-window", mm_win, sizeof(mm_win));
>   	dt_add_property_cells(dn_phb, "ibm,phb-diag-data-size", 0);
>   	dt_add_property_cells(dn_phb, "ibm,opal-num-pes", NPU2_MAX_PE_NUM);
> 
> -	n->mm_base = mm_win[0];
> -	n->mm_size = mm_win[1];
> -
>   	dt_add_property_cells(dn_phb, "ranges", 0x02000000,
> -			      hi32(n->mm_base), lo32(n->mm_base),
> -			      hi32(n->mm_base), lo32(n->mm_base),
> -			      hi32(n->mm_size), lo32(n->mm_size));
> +			      hi32(mm_win[0]), lo32(mm_win[0]),
> +			      hi32(mm_win[0]), lo32(mm_win[0]),
> +			      hi32(mm_win[1]), lo32(mm_win[1]));
> 
> -	dev->type = NPU2_DEV_TYPE_OPENCAPI;
> -	dev->npu = n;
> -	dev->dt_node = dn_link;
>   	dev->phb_ocapi.dt_node = dn_phb;
>   	dev->phb_ocapi.ops = &npu2_opencapi_ops;
>   	dev->phb_ocapi.phb_type = phb_type_npu_v2_opencapi;
>   	dev->phb_ocapi.scan_map = 0;
> -	dev->link_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> -	dev->brick_index = dev->link_index;
> -	dev->pl_xscom_base = dt_prop_get_u64(dn_link, "ibm,npu-phy");
> -	dev->lane_mask = dt_prop_get_u32(dn_link, "ibm,npu-lane-mask");
> -	dev->link_speed = dt_prop_get_u64(dn_link, "ibm,link-speed");
> +
>   	dev->bdfn = 0;
>   	dev->train_need_fence = false;
>   	dev->train_fenced = false;
> -	n->total_devices++;
> -
>   	/* Find I2C port for handling device reset */
>   	snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d",
>   		 dev->npu->chip_id, platform.ocapi->i2c_engine,
> @@ -1663,11 +1646,11 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
> 
>   	/* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */
>   	/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
> -	setup_afu_mmio_bars(n->chip_id, n->xscom_base, dev);
> +	setup_afu_mmio_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
>   	/* Procedure 13.1.3.9 - AFU Config BARs */
> -	setup_afu_config_bars(n->chip_id, n->xscom_base, dev);
> +	setup_afu_config_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
> 
> -	set_fence_control(n->chip_id, n->xscom_base, dev->brick_index, 0b00);
> +	set_fence_control(dev->npu->chip_id, dev->npu->xscom_base, dev->brick_index, 0b00);
> 
>   	if (npu2_ocapi_training_state != NPU2_TRAIN_DEFAULT) {
>   		setup_debug_training_state(dev);
> @@ -1689,107 +1672,72 @@ failed:
>   	return;
>   }
> 
> -static void npu2_opencapi_probe(struct dt_node *dn)
> +static void read_nvram_training_state(void)
>   {
> -	struct dt_node *link;
> -	char *path;
> -	uint32_t gcid, index, links, scom_base;
> -	uint64_t reg[2];
> -	uint64_t dev_index;
> -	struct npu2 *n;
> -	int rc, i = 0;
> -
> -	gcid = dt_get_chip_id(dn);
> -	index = dt_prop_get_u32(dn, "ibm,npu-index");
> -	links = dt_prop_get_u32(dn, "ibm,npu-links");
> -
> -	/* Don't try to init when we have an NVLink link */
> -	dt_for_each_compatible(dn, link, "ibm,npu-link") {
> -		if (npu2_dt_link_dev_type(link) != NPU2_DEV_TYPE_OPENCAPI) {
> -			prlog(PR_DEBUG,
> -			      "OCAPI: NPU%d: Non-OpenCAPI link found, skipping OpenCAPI init\n",
> -			      index);
> -			return;
> -		}
> +	const char *state;
> +
> +	state = nvram_query("opencapi-link-training");
> +	if (state) {
> +		if (!strcmp(state, "prbs31"))
> +			npu2_ocapi_training_state = NPU2_TRAIN_PRBS31;
> +		else if (!strcmp(state, "none"))
> +			npu2_ocapi_training_state = NPU2_TRAIN_NONE;
> +		else
> +			prlog(PR_WARNING,
> +			      "OCAPI: invalid training state in NVRAM: %s\n",
> +			      state);
>   	}
> +}
> 
> -	path = dt_get_path(dn);
> -	prlog(PR_INFO, "OCAPI: Chip %d Found OpenCAPI NPU%d (%d links) at %s\n",
> -	      gcid, index, links, path);
> -	free(path);
> +int npu2_opencapi_init_npu(struct npu2 *npu)
> +{
> +	struct npu2_dev *dev;
> +	uint64_t reg[2];
> +	int rc;
> 
>   	assert(platform.ocapi);
> +	read_nvram_training_state();
> 
>   	/* TODO: Test OpenCAPI with fast reboot and make it work */
>   	disable_fast_reboot("OpenCAPI device enabled");
> 
> -	scom_base = dt_get_address(dn, 0, NULL);
> -	prlog(PR_INFO, "OCAPI:	 SCOM Base:  %08x\n", scom_base);
> +	setup_global_mmio_bar(npu->chip_id, npu->xscom_base, reg);
> 
> -	setup_global_mmio_bar(gcid, scom_base, reg);
> +	npu->regs = (void *)reg[0];
> 
> -	n = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev));
> -	n->devices = (struct npu2_dev *)(n + 1);
> -	n->chip_id = gcid;
> -	n->xscom_base = scom_base;
> -	n->regs = (void *)reg[0];
> -	n->dt_node = dn;
> +	for (int i = 0; i < npu->total_devices; i++) {
> +		dev = &npu->devices[i];
> +		if (dev->type != NPU2_DEV_TYPE_OPENCAPI)
> +			continue;
> 
> -	dt_for_each_compatible(dn, link, "ibm,npu-link") {
> -		dev_index = dt_prop_get_u32(link, "ibm,npu-link-index");
> -		prlog(PR_INFO, "OCAPI: Configuring link index %lld\n",
> -		      dev_index);
> +		prlog(PR_INFO, "OCAPI: Configuring link index %d, brick %d\n",
> +		      dev->link_index, dev->brick_index);
> 
>   		/* Procedure 13.1.3.1 - Select OCAPI vs NVLink */
> -		brick_config(gcid, scom_base, dev_index);
> +		brick_config(npu->chip_id, npu->xscom_base, dev->brick_index);
> 
>   		/* Procedure 13.1.3.5 - Transaction Layer Configuration */
> -		tl_config(gcid, scom_base, dev_index);
> +		tl_config(npu->chip_id, npu->xscom_base, dev->brick_index);
> 
>   		/* Procedure 13.1.3.6 - Address Translation Configuration */
> -		address_translation_config(gcid, scom_base, dev_index);
> +		address_translation_config(npu->chip_id, npu->xscom_base, dev->brick_index);
>   	}
> 
>   	/* Procedure 13.1.3.10 - Interrupt Configuration */
> -	rc = setup_irq(n);
> +	rc = setup_irq(npu);
>   	if (rc)
>   		goto failed;
> 
> -	dt_for_each_compatible(dn, link, "ibm,npu-link") {
> -		npu2_opencapi_setup_device(link, n, &n->devices[i]);
> -		i++;
> +	for (int i = 0; i < npu->total_devices; i++) {
> +		dev = &npu->devices[i];
> +		if (dev->type != NPU2_DEV_TYPE_OPENCAPI)
> +			continue;
> +		setup_device(dev);
>   	}
> 
> -	return;
> +	return 0;
>   failed:
> -	free(n);
> -}
> -
> -static void read_nvram_training_state(void)
> -{
> -	const char *state;
> -
> -	state = nvram_query("opencapi-link-training");
> -	if (state) {
> -		if (!strcmp(state, "prbs31"))
> -			npu2_ocapi_training_state = NPU2_TRAIN_PRBS31;
> -		else if (!strcmp(state, "none"))
> -			npu2_ocapi_training_state = NPU2_TRAIN_NONE;
> -		else
> -			prlog(PR_WARNING,
> -			      "OCAPI: invalid training state in NVRAM: %s\n",
> -			      state);
> -	}
> -}
> -
> -void probe_npu2_opencapi(void)
> -{
> -	struct dt_node *np_npu;
> -
> -	read_nvram_training_state();
> -
> -	dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu")
> -		npu2_opencapi_probe(np_npu);
> +	return -1;
>   }
> 
>   static const struct phb_ops npu2_opencapi_ops = {
> diff --git a/hw/npu2.c b/hw/npu2.c
> index be1c304420fc..419dd37a24b1 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -1357,44 +1357,13 @@ static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2], uint
>   }
> 
>   /*
> - * Probe NPU2 device node and create PCI root device node
> - * accordingly. The NPU2 device node should specify number
> - * of links and xscom base address to access links.
> + * Set up NPU for NVLink and create PCI root device node
> + * accordingly.
>    */
> -static void npu2_probe_phb(struct dt_node *dn)
> +int npu2_nvlink_init_npu(struct npu2 *npu)
>   {
> -	struct proc_chip *proc_chip;
> -	struct dt_node *np, *link;
> -	bool ocapi_detected = false, nvlink_detected = false;
> -	uint32_t gcid, scom, index, phb_index, links;
> +	struct dt_node *np;
>   	uint64_t reg[2], mm_win[2], val;
> -	char *path;
> -
> -	/* Abort if any OpenCAPI links detected */
> -	dt_for_each_compatible(dn, link, "ibm,npu-link") {
> -		if (npu2_dt_link_dev_type(link) == NPU2_DEV_TYPE_OPENCAPI)
> -			ocapi_detected = true;
> -		else
> -			nvlink_detected = true;
> -	}
> -
> -	if (ocapi_detected && nvlink_detected) {
> -		prlog(PR_ERR, "NPU: NVLink and OpenCAPI devices on same chip not supported\n");
> -	        return;
> -	} else if (ocapi_detected) {
> -		prlog(PR_INFO, "NPU: OpenCAPI link configuration detected, not initialising NVLink\n");
> -		return;
> -	}
> -
> -	/* Retrieve chip id */
> -	path = dt_get_path(dn);
> -	gcid = dt_get_chip_id(dn);
> -	proc_chip = get_chip(gcid);
> -	assert(proc_chip);
> -	if ((proc_chip->ec_level & 0xf0) > 0x20) {
> -		prerror("NPU: unsupported ec level on Chip 0x%x!\n", gcid);
> -		return;
> -	}
> 
>   	/* TODO: Clean this up with register names, etc. when we get
>   	 * time. This just turns NVLink mode on in each brick and should
> @@ -1403,64 +1372,56 @@ static void npu2_probe_phb(struct dt_node *dn)
>   	 *
>   	 * Obviously if the year is now 2020 that didn't happen and you
>   	 * should fix this :-) */
> -	xscom_write_mask(gcid, 0x5011000, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011030, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011060, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011090, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011200, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011230, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011260, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011290, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011400, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011430, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011460, PPC_BIT(58), PPC_BIT(58));
> -	xscom_write_mask(gcid, 0x5011490, PPC_BIT(58), PPC_BIT(58));
> -
> -	xscom_write_mask(gcid, 0x50110c0, PPC_BIT(53), PPC_BIT(53));
> -	xscom_write_mask(gcid, 0x50112c0, PPC_BIT(53), PPC_BIT(53));
> -	xscom_write_mask(gcid, 0x50114c0, PPC_BIT(53), PPC_BIT(53));
> -	xscom_write_mask(gcid, 0x50110f1, PPC_BIT(41), PPC_BIT(41));
> -	xscom_write_mask(gcid, 0x50112f1, PPC_BIT(41), PPC_BIT(41));
> -	xscom_write_mask(gcid, 0x50114f1, PPC_BIT(41), PPC_BIT(41));
> +	xscom_write_mask(npu->chip_id, 0x5011000, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011030, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011060, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011090, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011200, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011230, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011260, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011290, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011400, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011430, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011460, PPC_BIT(58), PPC_BIT(58));
> +	xscom_write_mask(npu->chip_id, 0x5011490, PPC_BIT(58), PPC_BIT(58));
> +
> +	xscom_write_mask(npu->chip_id, 0x50110c0, PPC_BIT(53), PPC_BIT(53));
> +	xscom_write_mask(npu->chip_id, 0x50112c0, PPC_BIT(53), PPC_BIT(53));
> +	xscom_write_mask(npu->chip_id, 0x50114c0, PPC_BIT(53), PPC_BIT(53));
> +	xscom_write_mask(npu->chip_id, 0x50110f1, PPC_BIT(41), PPC_BIT(41));
> +	xscom_write_mask(npu->chip_id, 0x50112f1, PPC_BIT(41), PPC_BIT(41));
> +	xscom_write_mask(npu->chip_id, 0x50114f1, PPC_BIT(41), PPC_BIT(41));
> 
>   	val = NPU2_NTL_MISC_CFG2_BRICK_ENABLE |
>   	      NPU2_NTL_MISC_CFG2_NDL_TX_PARITY_ENA |
>   	      NPU2_NTL_MISC_CFG2_NDL_PRI_PARITY_ENA |
>   	      NPU2_NTL_MISC_CFG2_RCV_CREDIT_OVERFLOW_ENA;
> -	xscom_write_mask(gcid, 0x5011110, val, val);
> -	xscom_write_mask(gcid, 0x5011130, val, val);
> -	xscom_write_mask(gcid, 0x5011310, val, val);
> -	xscom_write_mask(gcid, 0x5011330, val, val);
> -	xscom_write_mask(gcid, 0x5011510, val, val);
> -	xscom_write_mask(gcid, 0x5011530, val, val);
> +	xscom_write_mask(npu->chip_id, 0x5011110, val, val);
> +	xscom_write_mask(npu->chip_id, 0x5011130, val, val);
> +	xscom_write_mask(npu->chip_id, 0x5011310, val, val);
> +	xscom_write_mask(npu->chip_id, 0x5011330, val, val);
> +	xscom_write_mask(npu->chip_id, 0x5011510, val, val);
> +	xscom_write_mask(npu->chip_id, 0x5011530, val, val);
> 
>   	val = PPC_BIT(6) | PPC_BIT(7) | PPC_BIT(11);
> -	xscom_write_mask(gcid, 0x5011009, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011039, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011069, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011099, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011209, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011239, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011269, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011299, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011409, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011439, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011469, val, PPC_BITMASK(6,11));
> -	xscom_write_mask(gcid, 0x5011499, val, PPC_BITMASK(6,11));
> -
> -	index = dt_prop_get_u32(dn, "ibm,npu-index");
> -	phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
> -	links = dt_prop_get_u32(dn, "ibm,npu-links");
> -	prlog(PR_INFO, "NPU: Chip %d Found NPU2#%d (%d links) at %s\n",
> -	      gcid, index, links, path);
> -	free(path);
> -
> -	/* Retrieve scom base address */
> -	scom = dt_get_address(dn, 0, NULL);
> -	prlog(PR_INFO, "   SCOM Base:  %08x\n", scom);
> +	xscom_write_mask(npu->chip_id, 0x5011009, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011039, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011069, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011099, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011209, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011239, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011269, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011299, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011409, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011439, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011469, val, PPC_BITMASK(6,11));
> +	xscom_write_mask(npu->chip_id, 0x5011499, val, PPC_BITMASK(6,11));
> 
>   	/* Reassign the BARs */
> -	assign_mmio_bars(gcid, scom, reg, mm_win);
> +	assign_mmio_bars(npu->chip_id, npu->xscom_base, reg, mm_win);
> +	npu->regs = (void *)reg[0];
> +	npu->mm_base = mm_win[0];
> +	npu->mm_size = mm_win[1];
> 
>   	if (reg[0] && reg[1])
>   		prlog(PR_INFO, "   Global MMIO BAR:  %016llx (%lldMB)\n",
> @@ -1477,17 +1438,21 @@ static void npu2_probe_phb(struct dt_node *dn)
>   				"ibm,ioda2-npu2-phb");
>   	dt_add_property_strings(np, "device_type", "pciex");
>   	dt_add_property(np, "reg", reg, sizeof(reg));
> -	dt_add_property_cells(np, "ibm,phb-index", phb_index);
> -	dt_add_property_cells(np, "ibm,npu-index", index);
> -	dt_add_property_cells(np, "ibm,chip-id", gcid);
> -	dt_add_property_cells(np, "ibm,xscom-base", scom);
> -	dt_add_property_cells(np, "ibm,npcq", dn->phandle);
> -	dt_add_property_cells(np, "ibm,links", links);
> +	dt_add_property_cells(np, "ibm,phb-index", npu->phb_index);
> +	dt_add_property_cells(np, "ibm,npu-index", npu->index);
> +	dt_add_property_cells(np, "ibm,chip-id", npu->chip_id);
> +	dt_add_property_cells(np, "ibm,xscom-base", npu->xscom_base);
> +	dt_add_property_cells(np, "ibm,npcq", npu->dt_node->phandle);
> +	dt_add_property_cells(np, "ibm,links", npu->total_devices);
>   	dt_add_property(np, "ibm,mmio-window", mm_win, sizeof(mm_win));
>   	dt_add_property_cells(np, "ibm,phb-diag-data-size", 0);
> 
>   	/* Disable fast reboot - not currently supported */
>   	disable_fast_reboot("NVLink device enabled");
> +
> +	npu2_nvlink_create_phb(npu, np);
> +
> +	return 0;
>   }
> 
>   static uint32_t npu2_populate_pcie_cap(struct npu2_dev *dev,
> @@ -1949,49 +1914,25 @@ static void npu2_setup_irqs(struct npu2 *p)
>   	npu2_write(p, reg, val);
>   }
> 
> -static void npu2_create_phb(struct dt_node *dn)
> +void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn)
>   {
> -	const struct dt_property *prop;
> -	struct npu2 *p;
>   	struct pci_slot *slot;
> -	uint32_t links;
> -	void *pmem;
> -
> -	/* Retrieve number of devices */
> -	links = dt_prop_get_u32(dn, "ibm,links");
> -	pmem = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev));
> -	assert(pmem);
> -
> -	/* Populate PHB */
> -	p = pmem;
> -	p->index = dt_prop_get_u32(dn, "ibm,phb-index");
> -	p->chip_id = dt_prop_get_u32(dn, "ibm,chip-id");
> -	p->xscom_base = dt_prop_get_u32(dn, "ibm,xscom-base");
> -	p->total_devices = links;
> -	p->regs = (void *)dt_get_address(dn, 0, NULL);
> -
> -	prop = dt_require_property(dn, "ibm,mmio-window", -1);
> -	assert(prop->len >= (2 * sizeof(uint64_t)));
> -	p->mm_base = ((const uint64_t *)prop->prop)[0];
> -	p->mm_size = ((const uint64_t *)prop->prop)[1];
> -
> -	p->devices = pmem + sizeof(struct npu2);
> 
>   	/* Generic PHB */
> -	p->phb_nvlink.dt_node = dn;
> -	p->phb_nvlink.ops = &npu_ops;
> -	p->phb_nvlink.phb_type = phb_type_npu_v2;
> -	init_lock(&p->lock);
> -	init_lock(&p->phb_nvlink.lock);
> -	list_head_init(&p->phb_nvlink.devices);
> -	list_head_init(&p->phb_nvlink.virt_devices);
> -
> -	npu2_setup_irqs(p);
> -	npu2_populate_devices(p, dn);
> -	npu2_add_interrupt_map(p, dn);
> -	npu2_add_phb_properties(p);
> -
> -	slot = npu2_slot_create(&p->phb_nvlink);
> +	npu->phb_nvlink.dt_node = dn;
> +	npu->phb_nvlink.ops = &npu_ops;
> +	npu->phb_nvlink.phb_type = phb_type_npu_v2;
> +	init_lock(&npu->lock);
> +	init_lock(&npu->phb_nvlink.lock);
> +	list_head_init(&npu->phb_nvlink.devices);
> +	list_head_init(&npu->phb_nvlink.virt_devices);
> +
> +	npu2_setup_irqs(npu);
> +	npu2_populate_devices(npu, dn);
> +	npu2_add_interrupt_map(npu, dn);
> +	npu2_add_phb_properties(npu);
> +
> +	slot = npu2_slot_create(&npu->phb_nvlink);
>   	if (!slot)
>   	{
>   		/**
> @@ -2002,41 +1943,10 @@ static void npu2_create_phb(struct dt_node *dn)
>   		prlog(PR_ERR, "NPU: Cannot create PHB slot\n");
>   	}
> 
> -	pci_register_phb(&p->phb_nvlink, OPAL_DYNAMIC_PHB_ID);
> -
> -	npu2_init_ioda_cache(p);
> -	npu2_hw_init(p);
> -}
> -
> -void probe_npu2(void)
> -{
> -	struct proc_chip *chip = next_chip(NULL);
> -	struct dt_node *np;
> -	const char *zcal;
> -
> -	/* Abort if we're running on DD1 */
> -	if (chip &&
> -	    (chip->type == PROC_CHIP_P9_NIMBUS ||
> -	     chip->type == PROC_CHIP_P9_CUMULUS) &&
> -	    (chip->ec_level & 0xf0) == 0x10) {
> -		prlog(PR_INFO, "NPU: DD1 not supported\n");
> -		return;
> -	}
> -
> -	/* Check for a zcal override */
> -	zcal = nvram_query("nv_zcal_override");
> -	if (zcal) {
> -		nv_zcal_nominal = atoi(zcal);
> -		prlog(PR_WARNING, "NPU: Using ZCAL impedance override = %d\n", nv_zcal_nominal);
> -	}
> -
> -	/* Scan NPU2 XSCOM nodes */
> -	dt_for_each_compatible(dt_root, np, "ibm,power9-npu")
> -		npu2_probe_phb(np);
> +	pci_register_phb(&npu->phb_nvlink, OPAL_DYNAMIC_PHB_ID);
> 
> -	/* Scan newly created PHB nodes */
> -	dt_for_each_compatible(dt_root, np, "ibm,power9-npu-pciex")
> -		npu2_create_phb(np);
> +	npu2_init_ioda_cache(npu);
> +	npu2_hw_init(npu);
>   }
> 
>   /*
> diff --git a/include/npu2.h b/include/npu2.h
> index 10742031ec0f..0e07d77b62c5 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -167,6 +167,7 @@ struct npu2 {
> 
>   	/* NVLink */
>   	struct phb	phb_nvlink;
> +	uint32_t	phb_index;
>   };
> 
>   static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
> @@ -193,6 +194,10 @@ static inline struct phb *npu2_dev_to_phb(struct npu2_dev *ndev)
>   	}
>   }
> 
> +int npu2_opencapi_init_npu(struct npu2 *npu);
> +int npu2_nvlink_init_npu(struct npu2 *npu);
> +void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn);
> +
>   enum npu2_dev_type npu2_dt_link_dev_type(struct dt_node *link);
>   void npu2_write_4b(struct npu2 *p, uint64_t reg, uint32_t val);
>   uint32_t npu2_read_4b(struct npu2 *p, uint64_t reg);
> diff --git a/include/skiboot.h b/include/skiboot.h
> index bba76c12c39e..f2818b1a2326 100644
> --- a/include/skiboot.h
> +++ b/include/skiboot.h
> @@ -201,7 +201,6 @@ extern int preload_capp_ucode(void);
>   extern void preload_io_vpd(void);
>   extern void probe_npu(void);
>   extern void probe_npu2(void);
> -extern void probe_npu2_opencapi(void);
>   extern void uart_init(void);
>   extern void mbox_init(void);
>   extern void early_uart_init(void);
> 



More information about the Skiboot mailing list