[Skiboot] [PATCH v3 2/3] npu2: Enable recoverable data link (no-stall) interrupts
Frederic Barrat
fbarrat at linux.vnet.ibm.com
Mon Aug 28 22:59:39 AEST 2017
Le 22/08/2017 à 01:58, Sam Bobroff a écrit :
> Allow the NPU2 to trigger "recoverable data link" interrupts.
>
> Signed-off-by: Sam Bobroff <sam.bobroff at au1.ibm.com>
> ---
> hw/npu2.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++------
> include/npu2-regs.h | 10 ++++
> 2 files changed, 131 insertions(+), 15 deletions(-)
>
> diff --git a/hw/npu2.c b/hw/npu2.c
> index 63193f16..5dbd2167 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -36,6 +36,17 @@
> #include <chip.h>
> #include <phys-map.h>
> #include <nvram.h>
> +#include <xive.h>
> +
> +#define NPU2_IRQ_BASE_SHIFT 13
> +#define NPU2_N_DL_IRQS 23
> +#define NPU2_N_DL_IRQS_ALIGN 32
> +
> +#define VENDOR_CAP_START 0x80
> +#define VENDOR_CAP_END 0x90
> +#define VENDOR_CAP_LEN 0x10
> +#define VENDOR_CAP_VERSION 0x01
> +#define VENDOR_CAP_PCI_DEV_OFFSET 0x0d
>
> /*
> * NPU2 BAR layout definition. We have 3 stacks and each of them
> @@ -53,11 +64,6 @@
> * configure one particular BAR.
> */
>
> -#define VENDOR_CAP_START 0x80
> -#define VENDOR_CAP_END 0x90
> -
> -#define VENDOR_CAP_PCI_DEV_OFFSET 0x0d
> -
> static bool is_p9dd1(void)
> {
> struct proc_chip *chip = next_chip(NULL);
> @@ -1395,17 +1401,14 @@ static uint32_t npu2_populate_vendor_cap(struct npu2_dev *dev,
> {
> struct pci_virt_device *pvd = dev->pvd;
>
> -#define NPU2_VENDOR_CAP_VERSION 0x00
> -#define NPU2_VENDOR_CAP_LEN 0x10
> -
> /* Capbility list */
> PCI_VIRT_CFG_INIT_RO(pvd, prev_cap, 1, start);
> PCI_VIRT_CFG_INIT_RO(pvd, start, 1, PCI_CFG_CAP_ID_VENDOR);
> dev->vendor_cap = start;
>
> /* Length and version */
> - PCI_VIRT_CFG_INIT_RO(pvd, start + 2, 1, NPU2_VENDOR_CAP_LEN);
> - PCI_VIRT_CFG_INIT_RO(pvd, start + 3, 1, NPU2_VENDOR_CAP_VERSION);
> + PCI_VIRT_CFG_INIT_RO(pvd, start + 2, 1, VENDOR_CAP_LEN);
> + PCI_VIRT_CFG_INIT_RO(pvd, start + 3, 1, VENDOR_CAP_VERSION);
>
> /*
> * Defaults when the trap can't handle the read/write (eg. due
> @@ -1423,7 +1426,7 @@ static uint32_t npu2_populate_vendor_cap(struct npu2_dev *dev,
> /* Link index */
> PCI_VIRT_CFG_INIT_RO(pvd, start + 0xc, 1, dev->index);
>
> - return start + NPU2_VENDOR_CAP_LEN;
> + return start + VENDOR_CAP_LEN;
> }
>
> static void npu2_populate_cfg(struct npu2_dev *dev)
> @@ -1497,10 +1500,7 @@ static void npu2_populate_cfg(struct npu2_dev *dev)
> PCI_VIRT_CFG_INIT_RO(pvd, 0x38, 4, 0x00000000);
>
> /* 0x3c - INT line/pin/Minimal grant/Maximal latency */
> - if (!NPU2DEV_BRICK(dev))
> - PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000100);
> - else
> - PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000200);
> + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000100); /* INT A */
>
> /* PCIE and vendor specific capability */
> pos = npu2_populate_pcie_cap(dev, 0x40, PCI_CFG_CAP);
> @@ -1597,6 +1597,39 @@ static void npu2_populate_devices(struct npu2 *p,
> }
> }
>
> +static void npu2_add_interrupt_map(struct npu2 *p,
> + struct dt_node *dn)
> +{
> + struct dt_node *npu2_dn, *link, *phb_dn;
> + uint32_t npu2_phandle, index = 0, i;
> + uint32_t icsp = get_ics_phandle();
> + uint32_t *map;
> + size_t map_size;
> + uint32_t mask[] = {0xff00, 0x0, 0x0, 0x7};
> +
> + npu2_phandle = dt_prop_get_u32(dn, "ibm,npcq");
> + assert((npu2_dn = dt_find_by_phandle(dt_root, npu2_phandle)));
> + assert((phb_dn = p->phb.dt_node));
> + map_size = 7 * sizeof(*map) * p->total_devices;
> + map = malloc(map_size);
> + index = 0;
> + dt_for_each_compatible(npu2_dn, link, "ibm,npu-link") {
> + i = index * 7;
> + map[i + 0] = (p->devices[index].bdfn << 8);
> + map[i + 1] = 0;
> + map[i + 2] = 0;
> +
> + map[i + 3] = 1; /* INT A */
> + map[i + 4] = icsp; /* interrupt-parent */
> + map[i + 5] = p->base_lsi + (index * 2) + 1; /* NDL No-Stall Event */
> + map[i + 6] = 0; /* 0 = EDGE, 1 = LEVEL. */
> + index++;
> + }
> + dt_add_property(phb_dn, "interrupt-map", map, map_size);
> + free(map);
> + dt_add_property(phb_dn, "interrupt-map-mask", mask, sizeof(mask));
> +}
> +
> static void npu2_add_phb_properties(struct npu2 *p)
> {
> struct dt_node *np = p->phb.dt_node;
> @@ -1641,6 +1674,77 @@ static void npu2_add_phb_properties(struct npu2 *p)
> hi32(mm_size), lo32(mm_size));
> }
>
> +static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused)
> +{
> + return IRQ_ATTR_TARGET_LINUX;
> +}
> +
> +static char *npu2_ipi_name(struct irq_source *is, uint32_t isn)
> +{
> + struct npu2 *p = is->data;
> + uint32_t idx = isn - p->base_lsi;
> + const char *name;
> +
> + switch (idx) {
> + case 0: name = "NDL 0 Stall Event (brick 0)"; break;
> + case 1: name = "NDL 0 No-Stall Event (brick 0)"; break;
> + case 2: name = "NDL 1 Stall Event (brick 1)"; break;
> + case 3: name = "NDL 1 No-Stall Event (brick 1)"; break;
> + case 4: name = "NDL 2 Stall Event (brick 2)"; break;
> + case 5: name = "NDL 2 No-Stall Event (brick 2)"; break;
> + case 6: name = "NDL 5 Stall Event (brick 3)"; break;
> + case 7: name = "NDL 5 No-Stall Event (brick 3)"; break;
> + case 8: name = "NDL 4 Stall Event (brick 4)"; break;
> + case 9: name = "NDL 4 No-Stall Event (brick 4)"; break;
> + case 10: name = "NDL 3 Stall Event (brick 5)"; break;
> + case 11: name = "NDL 3 No-Stall Event (brick 5)"; break;
> + case 12: name = "NTL 0 Event"; break;
> + case 13: name = "NTL 1 Event"; break;
> + case 14: name = "NTL 2 Event"; break;
> + case 15: name = "NTL 3 Event"; break;
> + case 16: name = "NTL 4 Event"; break;
> + case 17: name = "NTL 5 Event"; break;
> + case 18: name = "TCE Event"; break;
> + case 19: name = "ATS Event"; break;
> + case 20: name = "CQ Event"; break;
> + case 21: name = "MISC Event"; break;
> + case 22: name = "NMMU Local Xstop"; break;
> + default: name = "Unknown";
> + }
> + return strdup(name);
> +}
> +
> +static const struct irq_source_ops npu2_ipi_ops = {
> + .attributes = npu2_ipi_attributes,
> + .name = npu2_ipi_name,
> +};
> +
> +static void npu2_setup_irqs(struct npu2 *p)
> +{
> + uint64_t reg, val;
> + void *tp;
> +
> + p->base_lsi = xive_alloc_ipi_irqs(p->chip_id, NPU2_N_DL_IRQS, NPU2_N_DL_IRQS_ALIGN);
> + if (p->base_lsi == XIVE_IRQ_ERROR) {
> + prlog(PR_ERR, "NPU2: Failed to allocate interrupt sources, IRQs for NDL No-stall events will not be available.\n");
> + return;
> + }
> + xive_register_ipi_source(p->base_lsi, NPU2_N_DL_IRQS, p, &npu2_ipi_ops );
> +
> + /* Set IPI configuration */
> + reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_CFG);
> + val = npu2_read(p, reg);
> + val = SETFIELD(NPU2_MISC_CFG_IPI_PS, val, NPU2_MISC_CFG_IPI_PS_64K);
> + val = SETFIELD(NPU2_MISC_CFG_IPI_OS, val, NPU2_MISC_CFG_IPI_OS_AIX);
> + npu2_write(p, reg, val);
> +
> + /* Set IRQ base */
> + reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_IRQ_BASE);
> + tp = xive_get_trigger_port(p->base_lsi);
> + val = ((uint64_t)tp) << NPU2_IRQ_BASE_SHIFT;
> + npu2_write(p, reg, val);
> +}
> +
This is really close to the setup we do for opencapi and it looks good
to me. The only difference really is that there are more interrupts
supported for opencapi (35 instead of 23), so the alignment requirement
when allocating the IPIs is different (64 instead of 32). But that
shouldn't be your concern.
Andrew: with your refactoring of the opencapi code to support multiple
devices, we need to (re)discuss if it's workable to mix opencapi and
nvlink devices under the same PHB. I think the latest was that they
would have to be on 2 different PHYs, but is that even ok? We can
discuss it on Wednesday.
Fred
> static void npu2_create_phb(struct dt_node *dn)
> {
> const struct dt_property *prop;
> @@ -1678,7 +1782,9 @@ static void npu2_create_phb(struct dt_node *dn)
> list_head_init(&p->phb.devices);
> list_head_init(&p->phb.virt_devices);
>
> + npu2_setup_irqs(p);
> npu2_populate_devices(p, dn);
> + npu2_add_interrupt_map(p, dn);
> npu2_add_phb_properties(p);
>
> slot = npu2_slot_create(&p->phb);
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index 86e2658a..759404cc 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -286,6 +286,16 @@ void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask);
> #define NPU2_MISC_ERR_RPT_HOLD NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x020)
> #define NPU2_MISC_ERR_RPT_MASK NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x028)
> #define NPU2_MISC_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x030)
> +#define NPU2_MISC_CFG_CONFIG_SYNC_WAIT PPC_BITMASK(0,4)
> +#define NPU2_MISC_CFG_PERF_CONFIG_ENABLE PPC_BIT(5)
> +#define NPU2_MISC_CFG_PERF_CONFIG_PE_MASK PPC_BIT(6)
> +#define NPU2_MISC_CFG_PERF_CONFIG_PE_MATCH PPC_BITMASK(7,10)
> +#define NPU2_MISC_CFG_IPI_PS PPC_BIT(11)
> +#define NPU2_MISC_CFG_IPI_PS_4K 0
> +#define NPU2_MISC_CFG_IPI_PS_64K 1
> +#define NPU2_MISC_CFG_IPI_OS PPC_BIT(12)
> +#define NPU2_MISC_CFG_IPI_OS_AIX 0
> +#define NPU2_MISC_CFG_IPI_OS_LINUX 1
> #define NPU2_MISC_INHIBIT_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x038)
> #define NPU2_MISC_FREEZE_ENABLE0 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x040)
> #define NPU2_MISC_FREEZE_ENABLE1 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x048)
>
More information about the Skiboot
mailing list