[Skiboot] [PATCH v3 3/6] hw/npu2: Common NPU2 init routine between NVLink and OpenCAPI
Frederic Barrat
fbarrat at linux.ibm.com
Fri Aug 31 23:59:32 AEST 2018
Le 31/08/2018 à 06:16, Andrew Donnellan a écrit :
> Replace probe_npu2() and probe_npu2_opencapi() with a new shared
> probe_npu2(). Refactor some of the common NPU setup code into shared code.
>
> No functional change. This patch does not implement support for using both
> types of devices simultaneously on the same NPU - we expect to add this
> sometime in the future.
>
> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> Acked-by: Reza Arbab <arbab at linux.ibm.com>
> Reviewed-by: Alistair Popple <alistair at popple.id.au>
>
> ---
>
> v1->v2:
> - cleaned up unneeded null check (Alistair)
>
> v2->v3:
> - add check for npu->devices overflow (Fred)
> - get rid of ocapi_global_mmio_base, which I'd separated out for... some
> presumably very good reason that I can't remember (Fred)
> ---
Reviewed-by: Frederic Barrat <fbarrat at linux.ibm.com>
> core/init.c | 2 +-
> hw/npu2-common.c | 119 +++++++++++++++++++++++-
> hw/npu2-opencapi.c | 158 ++++++++++---------------------
> hw/npu2.c | 236 ++++++++++++++--------------------------------
> include/npu2.h | 5 +-
> include/skiboot.h | 1 +-
> 6 files changed, 250 insertions(+), 271 deletions(-)
>
> diff --git a/core/init.c b/core/init.c
> index ca6c468c3941..9095981e67e0 100644
> --- a/core/init.c
> +++ b/core/init.c
> @@ -1126,8 +1126,6 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
> /* Probe NPUs */
> probe_npu();
> probe_npu2();
> - /* TODO: Eventually, we'll do NVLink and OpenCAPI together */
> - probe_npu2_opencapi();
>
> /* Initialize PCI */
> pci_init_slots();
> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
> index d076b4906fcc..476fc0683d56 100644
> --- a/hw/npu2-common.c
> +++ b/hw/npu2-common.c
> @@ -20,6 +20,7 @@
> #include <npu2.h>
> #include <npu2-regs.h>
> #include <bitutils.h>
> +#include <nvram.h>
>
> enum npu2_dev_type npu2_dt_link_dev_type(struct dt_node *link)
> {
> @@ -107,3 +108,121 @@ void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mas
> npu2_scom_write(p->chip_id, p->xscom_base, reg, NPU2_MISC_DA_LEN_4B,
> (uint64_t)new_val << 32);
> }
> +
> +static struct npu2 *setup_npu(struct dt_node *dn)
> +{
> + struct npu2 *npu;
> + struct npu2_dev *dev;
> + struct dt_node *np;
> + uint32_t num_links;
> + void *npumem;
> + char *path;
> + int gcid;
> + struct proc_chip *chip;
> + int i = 0;
> +
> + /* Retrieve chip ID */
> + path = dt_get_path(dn);
> + gcid = dt_get_chip_id(dn);
> + chip = get_chip(gcid);
> + assert(chip);
> +
> + num_links = dt_prop_get_u32(dn, "ibm,npu-links");
> + npumem = zalloc(sizeof(struct npu2) + num_links *
> + sizeof(struct npu2_dev));
> + assert(npumem);
> + npu = npumem;
> +
> + npu->dt_node = dn;
> + npu->index = dt_prop_get_u32(dn, "ibm,npu-index");
> + npu->chip_id = gcid;
> + npu->xscom_base = dt_get_address(dn, 0, NULL);
> + npu->phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
> + npu->devices = npumem + sizeof(struct npu2);
> +
> + dt_for_each_compatible(dn, np, "ibm,npu-link") {
> + assert(i < num_links);
> + dev = &npu->devices[i];
> + dev->link_index = dt_prop_get_u32(np, "ibm,npu-link-index");
> + /* May be overridden by platform presence detection */
> + dev->brick_index = dev->link_index;
> + dev->type = npu2_dt_link_dev_type(np);
> + dev->npu = npu;
> + dev->dt_node = np;
> + dev->pl_xscom_base = dt_prop_get_u64(np, "ibm,npu-phy");
> + dev->lane_mask = dt_prop_get_u32(np, "ibm,npu-lane-mask");
> + dev->link_speed = dt_prop_get_u64(np, "ibm,link-speed");
> + i++;
> + };
> + npu->total_devices = i;
> +
> + prlog(PR_INFO, "NPU: Chip %d Found NPU2#%d (%d links) at %s\n",
> + npu->chip_id, npu->index, npu->total_devices, path);
> + prlog(PR_INFO, " SCOM Base: %08llx\n", npu->xscom_base);
> + free(path);
> + return npu;
> +}
> +
> +static void setup_devices(struct npu2 *npu)
> +{
> + bool nvlink_detected = false, ocapi_detected = false;
> + struct npu2_dev *dev;
> +
> + /*
> + * TODO: In future, we'll do brick configuration here to support mixed
> + * setups.
> + */
> + for (int i = 0; i < npu->total_devices; i++) {
> + dev = &npu->devices[i];
> + switch (dev->type) {
> + case NPU2_DEV_TYPE_NVLINK:
> + nvlink_detected = true;
> + break;
> + case NPU2_DEV_TYPE_OPENCAPI:
> + ocapi_detected = true;
> + break;
> + default:
> + prlog(PR_INFO, "NPU: Link %d device not present\n",
> + npu->devices[i].link_index);
> + }
> + }
> +
> + if (nvlink_detected && ocapi_detected) {
> + prlog(PR_ERR, "NPU: NVLink and OpenCAPI devices on same chip not supported, aborting NPU init\n");
> + return;
> + }
> +
> + if (nvlink_detected)
> + npu2_nvlink_init_npu(npu);
> + else if (ocapi_detected)
> + npu2_opencapi_init_npu(npu);
> +}
> +
> +void probe_npu2(void)
> +{
> + struct proc_chip *chip = next_chip(NULL);
> + struct npu2 *npu;
> + struct dt_node *np;
> + const char *zcal;
> +
> + /* Abort if we're running on DD1 */
> + if (chip &&
> + (chip->type == PROC_CHIP_P9_NIMBUS ||
> + chip->type == PROC_CHIP_P9_CUMULUS) &&
> + (chip->ec_level & 0xf0) == 0x10) {
> + prlog(PR_INFO, "NPU2: DD1 not supported\n");
> + return;
> + }
> +
> + /* Check for a zcal override */
> + zcal = nvram_query("nv_zcal_override");
> + if (zcal) {
> + nv_zcal_nominal = atoi(zcal);
> + prlog(PR_WARNING, "NPU2: Using ZCAL impedance override = %d\n", nv_zcal_nominal);
> + }
> +
> + dt_for_each_compatible(dt_root, np, "ibm,power9-npu") {
> + npu = setup_npu(np);
> + setup_devices(npu);
> + }
> +}
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 68ae93a2f836..270876f78aa3 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -1450,7 +1450,7 @@ static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd,
> dar = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DAR(stacku, block);
> tfc = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_TFC(stacku, block);
> handle = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_PEHANDLE(stacku,
> - block);
> + block);
> dt_add_property_cells(pd->dn, "ibm,opal-xsl-irq", irq);
> dt_add_property_cells(pd->dn, "ibm,opal-xsl-mmio",
> hi32(dsisr), lo32(dsisr),
> @@ -1576,20 +1576,15 @@ static void setup_debug_training_state(struct npu2_dev *dev)
> }
> }
>
> -static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
> - struct npu2_dev *dev)
> +static void setup_device(struct npu2_dev *dev)
> {
> - uint32_t dev_index, npu_index;
> struct dt_node *dn_phb, *dn;
> struct pci_slot *slot;
> char port_name[17];
> uint64_t mm_win[2];
>
> - dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> - npu_index = dt_prop_get_u32(n->dt_node, "ibm,npu-index");
> -
> /* Populate PHB device node */
> - phys_map_get(n->chip_id, NPU_OCAPI_MMIO, dev_index, &mm_win[0],
> + phys_map_get(dev->npu->chip_id, NPU_OCAPI_MMIO, dev->brick_index, &mm_win[0],
> &mm_win[1]);
> prlog(PR_DEBUG, "OCAPI: Setting MMIO window to %016llx + %016llx\n",
> mm_win[0], mm_win[1]);
> @@ -1609,40 +1604,28 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
>
> dt_add_property_strings(dn_phb, "device_type", "pciex");
> dt_add_property(dn_phb, "reg", mm_win, sizeof(mm_win));
> - dt_add_property_cells(dn_phb, "ibm,npu-index", npu_index);
> - dt_add_property_cells(dn_phb, "ibm,chip-id", n->chip_id);
> - dt_add_property_cells(dn_phb, "ibm,xscom-base", n->xscom_base);
> - dt_add_property_cells(dn_phb, "ibm,npcq", n->dt_node->phandle);
> + dt_add_property_cells(dn_phb, "ibm,npu-index", dev->npu->index);
> + dt_add_property_cells(dn_phb, "ibm,chip-id", dev->npu->chip_id);
> + dt_add_property_cells(dn_phb, "ibm,xscom-base", dev->npu->xscom_base);
> + dt_add_property_cells(dn_phb, "ibm,npcq", dev->npu->dt_node->phandle);
> dt_add_property_cells(dn_phb, "ibm,links", 1);
> dt_add_property(dn_phb, "ibm,mmio-window", mm_win, sizeof(mm_win));
> dt_add_property_cells(dn_phb, "ibm,phb-diag-data-size", 0);
> dt_add_property_cells(dn_phb, "ibm,opal-num-pes", NPU2_MAX_PE_NUM);
>
> - n->mm_base = mm_win[0];
> - n->mm_size = mm_win[1];
> -
> dt_add_property_cells(dn_phb, "ranges", 0x02000000,
> - hi32(n->mm_base), lo32(n->mm_base),
> - hi32(n->mm_base), lo32(n->mm_base),
> - hi32(n->mm_size), lo32(n->mm_size));
> + hi32(mm_win[0]), lo32(mm_win[0]),
> + hi32(mm_win[0]), lo32(mm_win[0]),
> + hi32(mm_win[1]), lo32(mm_win[1]));
>
> - dev->type = NPU2_DEV_TYPE_OPENCAPI;
> - dev->npu = n;
> - dev->dt_node = dn_link;
> dev->phb_ocapi.dt_node = dn_phb;
> dev->phb_ocapi.ops = &npu2_opencapi_ops;
> dev->phb_ocapi.phb_type = phb_type_npu_v2_opencapi;
> dev->phb_ocapi.scan_map = 0;
> - dev->link_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> - dev->brick_index = dev->link_index;
> - dev->pl_xscom_base = dt_prop_get_u64(dn_link, "ibm,npu-phy");
> - dev->lane_mask = dt_prop_get_u32(dn_link, "ibm,npu-lane-mask");
> - dev->link_speed = dt_prop_get_u64(dn_link, "ibm,link-speed");
> +
> dev->bdfn = 0;
> dev->train_need_fence = false;
> dev->train_fenced = false;
> - n->total_devices++;
> -
> /* Find I2C port for handling device reset */
> snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d",
> dev->npu->chip_id, platform.ocapi->i2c_engine,
> @@ -1663,11 +1646,11 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
>
> /* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */
> /* Procedure 13.1.3.8 - AFU MMIO Range BARs */
> - setup_afu_mmio_bars(n->chip_id, n->xscom_base, dev);
> + setup_afu_mmio_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
> /* Procedure 13.1.3.9 - AFU Config BARs */
> - setup_afu_config_bars(n->chip_id, n->xscom_base, dev);
> + setup_afu_config_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
>
> - set_fence_control(n->chip_id, n->xscom_base, dev->brick_index, 0b00);
> + set_fence_control(dev->npu->chip_id, dev->npu->xscom_base, dev->brick_index, 0b00);
>
> if (npu2_ocapi_training_state != NPU2_TRAIN_DEFAULT) {
> setup_debug_training_state(dev);
> @@ -1689,107 +1672,72 @@ failed:
> return;
> }
>
> -static void npu2_opencapi_probe(struct dt_node *dn)
> +static void read_nvram_training_state(void)
> {
> - struct dt_node *link;
> - char *path;
> - uint32_t gcid, index, links, scom_base;
> - uint64_t reg[2];
> - uint64_t dev_index;
> - struct npu2 *n;
> - int rc, i = 0;
> -
> - gcid = dt_get_chip_id(dn);
> - index = dt_prop_get_u32(dn, "ibm,npu-index");
> - links = dt_prop_get_u32(dn, "ibm,npu-links");
> -
> - /* Don't try to init when we have an NVLink link */
> - dt_for_each_compatible(dn, link, "ibm,npu-link") {
> - if (npu2_dt_link_dev_type(link) != NPU2_DEV_TYPE_OPENCAPI) {
> - prlog(PR_DEBUG,
> - "OCAPI: NPU%d: Non-OpenCAPI link found, skipping OpenCAPI init\n",
> - index);
> - return;
> - }
> + const char *state;
> +
> + state = nvram_query("opencapi-link-training");
> + if (state) {
> + if (!strcmp(state, "prbs31"))
> + npu2_ocapi_training_state = NPU2_TRAIN_PRBS31;
> + else if (!strcmp(state, "none"))
> + npu2_ocapi_training_state = NPU2_TRAIN_NONE;
> + else
> + prlog(PR_WARNING,
> + "OCAPI: invalid training state in NVRAM: %s\n",
> + state);
> }
> +}
>
> - path = dt_get_path(dn);
> - prlog(PR_INFO, "OCAPI: Chip %d Found OpenCAPI NPU%d (%d links) at %s\n",
> - gcid, index, links, path);
> - free(path);
> +int npu2_opencapi_init_npu(struct npu2 *npu)
> +{
> + struct npu2_dev *dev;
> + uint64_t reg[2];
> + int rc;
>
> assert(platform.ocapi);
> + read_nvram_training_state();
>
> /* TODO: Test OpenCAPI with fast reboot and make it work */
> disable_fast_reboot("OpenCAPI device enabled");
>
> - scom_base = dt_get_address(dn, 0, NULL);
> - prlog(PR_INFO, "OCAPI: SCOM Base: %08x\n", scom_base);
> + setup_global_mmio_bar(npu->chip_id, npu->xscom_base, reg);
>
> - setup_global_mmio_bar(gcid, scom_base, reg);
> + npu->regs = (void *)reg[0];
>
> - n = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev));
> - n->devices = (struct npu2_dev *)(n + 1);
> - n->chip_id = gcid;
> - n->xscom_base = scom_base;
> - n->regs = (void *)reg[0];
> - n->dt_node = dn;
> + for (int i = 0; i < npu->total_devices; i++) {
> + dev = &npu->devices[i];
> + if (dev->type != NPU2_DEV_TYPE_OPENCAPI)
> + continue;
>
> - dt_for_each_compatible(dn, link, "ibm,npu-link") {
> - dev_index = dt_prop_get_u32(link, "ibm,npu-link-index");
> - prlog(PR_INFO, "OCAPI: Configuring link index %lld\n",
> - dev_index);
> + prlog(PR_INFO, "OCAPI: Configuring link index %d, brick %d\n",
> + dev->link_index, dev->brick_index);
>
> /* Procedure 13.1.3.1 - Select OCAPI vs NVLink */
> - brick_config(gcid, scom_base, dev_index);
> + brick_config(npu->chip_id, npu->xscom_base, dev->brick_index);
>
> /* Procedure 13.1.3.5 - Transaction Layer Configuration */
> - tl_config(gcid, scom_base, dev_index);
> + tl_config(npu->chip_id, npu->xscom_base, dev->brick_index);
>
> /* Procedure 13.1.3.6 - Address Translation Configuration */
> - address_translation_config(gcid, scom_base, dev_index);
> + address_translation_config(npu->chip_id, npu->xscom_base, dev->brick_index);
> }
>
> /* Procedure 13.1.3.10 - Interrupt Configuration */
> - rc = setup_irq(n);
> + rc = setup_irq(npu);
> if (rc)
> goto failed;
>
> - dt_for_each_compatible(dn, link, "ibm,npu-link") {
> - npu2_opencapi_setup_device(link, n, &n->devices[i]);
> - i++;
> + for (int i = 0; i < npu->total_devices; i++) {
> + dev = &npu->devices[i];
> + if (dev->type != NPU2_DEV_TYPE_OPENCAPI)
> + continue;
> + setup_device(dev);
> }
>
> - return;
> + return 0;
> failed:
> - free(n);
> -}
> -
> -static void read_nvram_training_state(void)
> -{
> - const char *state;
> -
> - state = nvram_query("opencapi-link-training");
> - if (state) {
> - if (!strcmp(state, "prbs31"))
> - npu2_ocapi_training_state = NPU2_TRAIN_PRBS31;
> - else if (!strcmp(state, "none"))
> - npu2_ocapi_training_state = NPU2_TRAIN_NONE;
> - else
> - prlog(PR_WARNING,
> - "OCAPI: invalid training state in NVRAM: %s\n",
> - state);
> - }
> -}
> -
> -void probe_npu2_opencapi(void)
> -{
> - struct dt_node *np_npu;
> -
> - read_nvram_training_state();
> -
> - dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu")
> - npu2_opencapi_probe(np_npu);
> + return -1;
> }
>
> static const struct phb_ops npu2_opencapi_ops = {
> diff --git a/hw/npu2.c b/hw/npu2.c
> index be1c304420fc..419dd37a24b1 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -1357,44 +1357,13 @@ static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2], uint
> }
>
> /*
> - * Probe NPU2 device node and create PCI root device node
> - * accordingly. The NPU2 device node should specify number
> - * of links and xscom base address to access links.
> + * Set up NPU for NVLink and create PCI root device node
> + * accordingly.
> */
> -static void npu2_probe_phb(struct dt_node *dn)
> +int npu2_nvlink_init_npu(struct npu2 *npu)
> {
> - struct proc_chip *proc_chip;
> - struct dt_node *np, *link;
> - bool ocapi_detected = false, nvlink_detected = false;
> - uint32_t gcid, scom, index, phb_index, links;
> + struct dt_node *np;
> uint64_t reg[2], mm_win[2], val;
> - char *path;
> -
> - /* Abort if any OpenCAPI links detected */
> - dt_for_each_compatible(dn, link, "ibm,npu-link") {
> - if (npu2_dt_link_dev_type(link) == NPU2_DEV_TYPE_OPENCAPI)
> - ocapi_detected = true;
> - else
> - nvlink_detected = true;
> - }
> -
> - if (ocapi_detected && nvlink_detected) {
> - prlog(PR_ERR, "NPU: NVLink and OpenCAPI devices on same chip not supported\n");
> - return;
> - } else if (ocapi_detected) {
> - prlog(PR_INFO, "NPU: OpenCAPI link configuration detected, not initialising NVLink\n");
> - return;
> - }
> -
> - /* Retrieve chip id */
> - path = dt_get_path(dn);
> - gcid = dt_get_chip_id(dn);
> - proc_chip = get_chip(gcid);
> - assert(proc_chip);
> - if ((proc_chip->ec_level & 0xf0) > 0x20) {
> - prerror("NPU: unsupported ec level on Chip 0x%x!\n", gcid);
> - return;
> - }
>
> /* TODO: Clean this up with register names, etc. when we get
> * time. This just turns NVLink mode on in each brick and should
> @@ -1403,64 +1372,56 @@ static void npu2_probe_phb(struct dt_node *dn)
> *
> * Obviously if the year is now 2020 that didn't happen and you
> * should fix this :-) */
> - xscom_write_mask(gcid, 0x5011000, PPC_BIT(58), PPC_BIT(58));
> - xscom_write_mask(gcid, 0x5011030, PPC_BIT(58), PPC_BIT(58));
> - xscom_write_mask(gcid, 0x5011060, PPC_BIT(58), PPC_BIT(58));
> - xscom_write_mask(gcid, 0x5011090, PPC_BIT(58), PPC_BIT(58));
> - xscom_write_mask(gcid, 0x5011200, PPC_BIT(58), PPC_BIT(58));
> - xscom_write_mask(gcid, 0x5011230, PPC_BIT(58), PPC_BIT(58));
> - xscom_write_mask(gcid, 0x5011260, PPC_BIT(58), PPC_BIT(58));
> - xscom_write_mask(gcid, 0x5011290, PPC_BIT(58), PPC_BIT(58));
> - xscom_write_mask(gcid, 0x5011400, PPC_BIT(58), PPC_BIT(58));
> - xscom_write_mask(gcid, 0x5011430, PPC_BIT(58), PPC_BIT(58));
> - xscom_write_mask(gcid, 0x5011460, PPC_BIT(58), PPC_BIT(58));
> - xscom_write_mask(gcid, 0x5011490, PPC_BIT(58), PPC_BIT(58));
> -
> - xscom_write_mask(gcid, 0x50110c0, PPC_BIT(53), PPC_BIT(53));
> - xscom_write_mask(gcid, 0x50112c0, PPC_BIT(53), PPC_BIT(53));
> - xscom_write_mask(gcid, 0x50114c0, PPC_BIT(53), PPC_BIT(53));
> - xscom_write_mask(gcid, 0x50110f1, PPC_BIT(41), PPC_BIT(41));
> - xscom_write_mask(gcid, 0x50112f1, PPC_BIT(41), PPC_BIT(41));
> - xscom_write_mask(gcid, 0x50114f1, PPC_BIT(41), PPC_BIT(41));
> + xscom_write_mask(npu->chip_id, 0x5011000, PPC_BIT(58), PPC_BIT(58));
> + xscom_write_mask(npu->chip_id, 0x5011030, PPC_BIT(58), PPC_BIT(58));
> + xscom_write_mask(npu->chip_id, 0x5011060, PPC_BIT(58), PPC_BIT(58));
> + xscom_write_mask(npu->chip_id, 0x5011090, PPC_BIT(58), PPC_BIT(58));
> + xscom_write_mask(npu->chip_id, 0x5011200, PPC_BIT(58), PPC_BIT(58));
> + xscom_write_mask(npu->chip_id, 0x5011230, PPC_BIT(58), PPC_BIT(58));
> + xscom_write_mask(npu->chip_id, 0x5011260, PPC_BIT(58), PPC_BIT(58));
> + xscom_write_mask(npu->chip_id, 0x5011290, PPC_BIT(58), PPC_BIT(58));
> + xscom_write_mask(npu->chip_id, 0x5011400, PPC_BIT(58), PPC_BIT(58));
> + xscom_write_mask(npu->chip_id, 0x5011430, PPC_BIT(58), PPC_BIT(58));
> + xscom_write_mask(npu->chip_id, 0x5011460, PPC_BIT(58), PPC_BIT(58));
> + xscom_write_mask(npu->chip_id, 0x5011490, PPC_BIT(58), PPC_BIT(58));
> +
> + xscom_write_mask(npu->chip_id, 0x50110c0, PPC_BIT(53), PPC_BIT(53));
> + xscom_write_mask(npu->chip_id, 0x50112c0, PPC_BIT(53), PPC_BIT(53));
> + xscom_write_mask(npu->chip_id, 0x50114c0, PPC_BIT(53), PPC_BIT(53));
> + xscom_write_mask(npu->chip_id, 0x50110f1, PPC_BIT(41), PPC_BIT(41));
> + xscom_write_mask(npu->chip_id, 0x50112f1, PPC_BIT(41), PPC_BIT(41));
> + xscom_write_mask(npu->chip_id, 0x50114f1, PPC_BIT(41), PPC_BIT(41));
>
> val = NPU2_NTL_MISC_CFG2_BRICK_ENABLE |
> NPU2_NTL_MISC_CFG2_NDL_TX_PARITY_ENA |
> NPU2_NTL_MISC_CFG2_NDL_PRI_PARITY_ENA |
> NPU2_NTL_MISC_CFG2_RCV_CREDIT_OVERFLOW_ENA;
> - xscom_write_mask(gcid, 0x5011110, val, val);
> - xscom_write_mask(gcid, 0x5011130, val, val);
> - xscom_write_mask(gcid, 0x5011310, val, val);
> - xscom_write_mask(gcid, 0x5011330, val, val);
> - xscom_write_mask(gcid, 0x5011510, val, val);
> - xscom_write_mask(gcid, 0x5011530, val, val);
> + xscom_write_mask(npu->chip_id, 0x5011110, val, val);
> + xscom_write_mask(npu->chip_id, 0x5011130, val, val);
> + xscom_write_mask(npu->chip_id, 0x5011310, val, val);
> + xscom_write_mask(npu->chip_id, 0x5011330, val, val);
> + xscom_write_mask(npu->chip_id, 0x5011510, val, val);
> + xscom_write_mask(npu->chip_id, 0x5011530, val, val);
>
> val = PPC_BIT(6) | PPC_BIT(7) | PPC_BIT(11);
> - xscom_write_mask(gcid, 0x5011009, val, PPC_BITMASK(6,11));
> - xscom_write_mask(gcid, 0x5011039, val, PPC_BITMASK(6,11));
> - xscom_write_mask(gcid, 0x5011069, val, PPC_BITMASK(6,11));
> - xscom_write_mask(gcid, 0x5011099, val, PPC_BITMASK(6,11));
> - xscom_write_mask(gcid, 0x5011209, val, PPC_BITMASK(6,11));
> - xscom_write_mask(gcid, 0x5011239, val, PPC_BITMASK(6,11));
> - xscom_write_mask(gcid, 0x5011269, val, PPC_BITMASK(6,11));
> - xscom_write_mask(gcid, 0x5011299, val, PPC_BITMASK(6,11));
> - xscom_write_mask(gcid, 0x5011409, val, PPC_BITMASK(6,11));
> - xscom_write_mask(gcid, 0x5011439, val, PPC_BITMASK(6,11));
> - xscom_write_mask(gcid, 0x5011469, val, PPC_BITMASK(6,11));
> - xscom_write_mask(gcid, 0x5011499, val, PPC_BITMASK(6,11));
> -
> - index = dt_prop_get_u32(dn, "ibm,npu-index");
> - phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
> - links = dt_prop_get_u32(dn, "ibm,npu-links");
> - prlog(PR_INFO, "NPU: Chip %d Found NPU2#%d (%d links) at %s\n",
> - gcid, index, links, path);
> - free(path);
> -
> - /* Retrieve scom base address */
> - scom = dt_get_address(dn, 0, NULL);
> - prlog(PR_INFO, " SCOM Base: %08x\n", scom);
> + xscom_write_mask(npu->chip_id, 0x5011009, val, PPC_BITMASK(6,11));
> + xscom_write_mask(npu->chip_id, 0x5011039, val, PPC_BITMASK(6,11));
> + xscom_write_mask(npu->chip_id, 0x5011069, val, PPC_BITMASK(6,11));
> + xscom_write_mask(npu->chip_id, 0x5011099, val, PPC_BITMASK(6,11));
> + xscom_write_mask(npu->chip_id, 0x5011209, val, PPC_BITMASK(6,11));
> + xscom_write_mask(npu->chip_id, 0x5011239, val, PPC_BITMASK(6,11));
> + xscom_write_mask(npu->chip_id, 0x5011269, val, PPC_BITMASK(6,11));
> + xscom_write_mask(npu->chip_id, 0x5011299, val, PPC_BITMASK(6,11));
> + xscom_write_mask(npu->chip_id, 0x5011409, val, PPC_BITMASK(6,11));
> + xscom_write_mask(npu->chip_id, 0x5011439, val, PPC_BITMASK(6,11));
> + xscom_write_mask(npu->chip_id, 0x5011469, val, PPC_BITMASK(6,11));
> + xscom_write_mask(npu->chip_id, 0x5011499, val, PPC_BITMASK(6,11));
>
> /* Reassign the BARs */
> - assign_mmio_bars(gcid, scom, reg, mm_win);
> + assign_mmio_bars(npu->chip_id, npu->xscom_base, reg, mm_win);
> + npu->regs = (void *)reg[0];
> + npu->mm_base = mm_win[0];
> + npu->mm_size = mm_win[1];
>
> if (reg[0] && reg[1])
> prlog(PR_INFO, " Global MMIO BAR: %016llx (%lldMB)\n",
> @@ -1477,17 +1438,21 @@ static void npu2_probe_phb(struct dt_node *dn)
> "ibm,ioda2-npu2-phb");
> dt_add_property_strings(np, "device_type", "pciex");
> dt_add_property(np, "reg", reg, sizeof(reg));
> - dt_add_property_cells(np, "ibm,phb-index", phb_index);
> - dt_add_property_cells(np, "ibm,npu-index", index);
> - dt_add_property_cells(np, "ibm,chip-id", gcid);
> - dt_add_property_cells(np, "ibm,xscom-base", scom);
> - dt_add_property_cells(np, "ibm,npcq", dn->phandle);
> - dt_add_property_cells(np, "ibm,links", links);
> + dt_add_property_cells(np, "ibm,phb-index", npu->phb_index);
> + dt_add_property_cells(np, "ibm,npu-index", npu->index);
> + dt_add_property_cells(np, "ibm,chip-id", npu->chip_id);
> + dt_add_property_cells(np, "ibm,xscom-base", npu->xscom_base);
> + dt_add_property_cells(np, "ibm,npcq", npu->dt_node->phandle);
> + dt_add_property_cells(np, "ibm,links", npu->total_devices);
> dt_add_property(np, "ibm,mmio-window", mm_win, sizeof(mm_win));
> dt_add_property_cells(np, "ibm,phb-diag-data-size", 0);
>
> /* Disable fast reboot - not currently supported */
> disable_fast_reboot("NVLink device enabled");
> +
> + npu2_nvlink_create_phb(npu, np);
> +
> + return 0;
> }
>
> static uint32_t npu2_populate_pcie_cap(struct npu2_dev *dev,
> @@ -1949,49 +1914,25 @@ static void npu2_setup_irqs(struct npu2 *p)
> npu2_write(p, reg, val);
> }
>
> -static void npu2_create_phb(struct dt_node *dn)
> +void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn)
> {
> - const struct dt_property *prop;
> - struct npu2 *p;
> struct pci_slot *slot;
> - uint32_t links;
> - void *pmem;
> -
> - /* Retrieve number of devices */
> - links = dt_prop_get_u32(dn, "ibm,links");
> - pmem = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev));
> - assert(pmem);
> -
> - /* Populate PHB */
> - p = pmem;
> - p->index = dt_prop_get_u32(dn, "ibm,phb-index");
> - p->chip_id = dt_prop_get_u32(dn, "ibm,chip-id");
> - p->xscom_base = dt_prop_get_u32(dn, "ibm,xscom-base");
> - p->total_devices = links;
> - p->regs = (void *)dt_get_address(dn, 0, NULL);
> -
> - prop = dt_require_property(dn, "ibm,mmio-window", -1);
> - assert(prop->len >= (2 * sizeof(uint64_t)));
> - p->mm_base = ((const uint64_t *)prop->prop)[0];
> - p->mm_size = ((const uint64_t *)prop->prop)[1];
> -
> - p->devices = pmem + sizeof(struct npu2);
>
> /* Generic PHB */
> - p->phb_nvlink.dt_node = dn;
> - p->phb_nvlink.ops = &npu_ops;
> - p->phb_nvlink.phb_type = phb_type_npu_v2;
> - init_lock(&p->lock);
> - init_lock(&p->phb_nvlink.lock);
> - list_head_init(&p->phb_nvlink.devices);
> - list_head_init(&p->phb_nvlink.virt_devices);
> -
> - npu2_setup_irqs(p);
> - npu2_populate_devices(p, dn);
> - npu2_add_interrupt_map(p, dn);
> - npu2_add_phb_properties(p);
> -
> - slot = npu2_slot_create(&p->phb_nvlink);
> + npu->phb_nvlink.dt_node = dn;
> + npu->phb_nvlink.ops = &npu_ops;
> + npu->phb_nvlink.phb_type = phb_type_npu_v2;
> + init_lock(&npu->lock);
> + init_lock(&npu->phb_nvlink.lock);
> + list_head_init(&npu->phb_nvlink.devices);
> + list_head_init(&npu->phb_nvlink.virt_devices);
> +
> + npu2_setup_irqs(npu);
> + npu2_populate_devices(npu, dn);
> + npu2_add_interrupt_map(npu, dn);
> + npu2_add_phb_properties(npu);
> +
> + slot = npu2_slot_create(&npu->phb_nvlink);
> if (!slot)
> {
> /**
> @@ -2002,41 +1943,10 @@ static void npu2_create_phb(struct dt_node *dn)
> prlog(PR_ERR, "NPU: Cannot create PHB slot\n");
> }
>
> - pci_register_phb(&p->phb_nvlink, OPAL_DYNAMIC_PHB_ID);
> -
> - npu2_init_ioda_cache(p);
> - npu2_hw_init(p);
> -}
> -
> -void probe_npu2(void)
> -{
> - struct proc_chip *chip = next_chip(NULL);
> - struct dt_node *np;
> - const char *zcal;
> -
> - /* Abort if we're running on DD1 */
> - if (chip &&
> - (chip->type == PROC_CHIP_P9_NIMBUS ||
> - chip->type == PROC_CHIP_P9_CUMULUS) &&
> - (chip->ec_level & 0xf0) == 0x10) {
> - prlog(PR_INFO, "NPU: DD1 not supported\n");
> - return;
> - }
> -
> - /* Check for a zcal override */
> - zcal = nvram_query("nv_zcal_override");
> - if (zcal) {
> - nv_zcal_nominal = atoi(zcal);
> - prlog(PR_WARNING, "NPU: Using ZCAL impedance override = %d\n", nv_zcal_nominal);
> - }
> -
> - /* Scan NPU2 XSCOM nodes */
> - dt_for_each_compatible(dt_root, np, "ibm,power9-npu")
> - npu2_probe_phb(np);
> + pci_register_phb(&npu->phb_nvlink, OPAL_DYNAMIC_PHB_ID);
>
> - /* Scan newly created PHB nodes */
> - dt_for_each_compatible(dt_root, np, "ibm,power9-npu-pciex")
> - npu2_create_phb(np);
> + npu2_init_ioda_cache(npu);
> + npu2_hw_init(npu);
> }
>
> /*
> diff --git a/include/npu2.h b/include/npu2.h
> index 10742031ec0f..0e07d77b62c5 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -167,6 +167,7 @@ struct npu2 {
>
> /* NVLink */
> struct phb phb_nvlink;
> + uint32_t phb_index;
> };
>
> static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
> @@ -193,6 +194,10 @@ static inline struct phb *npu2_dev_to_phb(struct npu2_dev *ndev)
> }
> }
>
> +int npu2_opencapi_init_npu(struct npu2 *npu);
> +int npu2_nvlink_init_npu(struct npu2 *npu);
> +void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn);
> +
> enum npu2_dev_type npu2_dt_link_dev_type(struct dt_node *link);
> void npu2_write_4b(struct npu2 *p, uint64_t reg, uint32_t val);
> uint32_t npu2_read_4b(struct npu2 *p, uint64_t reg);
> diff --git a/include/skiboot.h b/include/skiboot.h
> index bba76c12c39e..f2818b1a2326 100644
> --- a/include/skiboot.h
> +++ b/include/skiboot.h
> @@ -201,7 +201,6 @@ extern int preload_capp_ucode(void);
> extern void preload_io_vpd(void);
> extern void probe_npu(void);
> extern void probe_npu2(void);
> -extern void probe_npu2_opencapi(void);
> extern void uart_init(void);
> extern void mbox_init(void);
> extern void early_uart_init(void);
>
More information about the Skiboot
mailing list