[Skiboot] [PATCH] npu2: Use phys-map to get MMIO BARs
Andrew Donnellan
andrew.donnellan at au1.ibm.com
Thu Jun 15 16:03:26 AEST 2017
Commit bdea201a4c4b ("hw/npu2.c: Use phys-map to get GPU memory BARs")
added use of phys-map for setting GPU memory BARs.
Move the MMIO BARs over to using phys-map as well.
Cc: Alistair Popple <alistair at popple.id.au>
Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
---
hw/npu2.c | 98 +++++++++++++++++++++++++++-------------------------------
include/npu2.h | 4 +++
2 files changed, 49 insertions(+), 53 deletions(-)
diff --git a/hw/npu2.c b/hw/npu2.c
index 040ab883..de1842f0 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -1099,58 +1099,49 @@ static const struct phb_ops npu_ops = {
.tce_kill = npu2_tce_kill,
};
-static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2], uint64_t mm_win[2])
+static void assign_mmio_bars(struct proc_chip *proc_chip, uint32_t scom,
+ uint64_t reg[2], uint64_t mm_win[2])
{
- uint64_t mem_start;
uint32_t i;
struct npu2_bar *bar;
struct npu2_bar npu2_bars[] = {
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR), .size = 0x1000000,
+ /*
+ * NPU_REGS must be first in this list, at least on DD1.
+ * On DD2, stack 0 will be used for NPU_REGS, stack 1/2 for NPU_PHY.
+ */
+ { .type = NPU_REGS, .index = 0,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
.flags = NPU2_BAR_FLAG_ENABLED },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR), .size = 0x200000,
+ { .type = NPU_PHY, .index = 0,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
.flags = NPU2_BAR_FLAG_ENABLED },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR), .size = 0x200000,
+ { .type = NPU_PHY, .index = 1,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
.flags = NPU2_BAR_FLAG_ENABLED },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL0_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL1_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL0_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL1_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL0_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL1_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_GENID_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_GENID_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_GENID_BAR), .size = 0x20000 },
+ { .type = NPU_NTL, .index = 0,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL0_BAR) },
+ { .type = NPU_NTL, .index = 1,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL1_BAR) },
+ { .type = NPU_NTL, .index = 2,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL0_BAR) },
+ { .type = NPU_NTL, .index = 3,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL1_BAR) },
+ { .type = NPU_NTL, .index = 4,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL0_BAR) },
+ { .type = NPU_NTL, .index = 5,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL1_BAR) },
+ { .type = NPU_GENID, .index = 0,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_GENID_BAR) },
+ { .type = NPU_GENID, .index = 1,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_GENID_BAR) },
+ { .type = NPU_GENID, .index = 2,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_GENID_BAR) },
};
- mem_start = 0x6030200000000;
- mem_start |= gcid << PPC_BITLSHIFT(21);
-
- /*
- * We're going to assign the BARs in reversed order according
- * to their sizes, just like the order we have in npu_bars[].
- * In that way, all BARs will be aligned perfectly without
- * wasting resources. Also, the Linux kernel won't change
- * anything though it attempts to reassign the BARs that
- * it can see, which are NTL and GENID BARs.
- *
- * GLOBAL MMIO (16MB)
- * PHY0 (2MB)
- * PHB1 (2MB)
- * NTL0 (128KB)
- * NTL1 (128KB)
- * NTL2 (128KB)
- * NTL3 (128KB)
- * NTL4 (128KB)
- * NTL5 (128KB)
- * GENID0 (128KB)
- * GENID1 (128KB)
- * GENID2 (128KB)
- */
for (i = 0; i < ARRAY_SIZE(npu2_bars); i++) {
bar = &npu2_bars[i];
- bar->base = mem_start;
- mem_start += bar->size;
- npu2_write_bar(NULL, bar, gcid, scom);
+ phys_map_get(proc_chip, bar->type, bar->index, &bar->base, &bar->size);
+ npu2_write_bar(NULL, bar, proc_chip->id, scom);
}
/* Global MMIO BAR */
@@ -1199,7 +1190,7 @@ static void npu2_probe_phb(struct dt_node *dn)
prlog(PR_INFO, " SCOM Base: %08x\n", scom);
/* Reassign the BARs */
- assign_mmio_bars(gcid, scom, reg, mm_win);
+ assign_mmio_bars(proc_chip, scom, reg, mm_win);
if (reg[0] && reg[1])
prlog(PR_INFO, " Global MMIO BAR: %016llx (%lldMB)\n",
@@ -1442,6 +1433,8 @@ static void npu2_populate_devices(struct npu2 *p,
struct npu2_dev *dev;
struct dt_node *npu2_dn, *link;
uint32_t npu_phandle, index = 0;
+ struct proc_chip *chip = get_chip(p->chip_id);
+ int stack;
/*
* Get the npu node which has the links which we expand here
@@ -1474,23 +1467,22 @@ static void npu2_populate_devices(struct npu2 *p,
dev->pl_xscom_base = dt_prop_get_u64(link, "ibm,npu-phy");
dev->lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");
- /* Populate BARs. BAR0/1 is the NTL bar. We initialise
- * it from the HW. */
+ /* Populate BARs. BAR0/1 is the NTL bar. */
+ stack = NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev);
npu2_bar = &dev->bars[0].npu2_bar;
- if (NPU2DEV_BRICK(dev) == 0)
- /* Leave the block as 0 - the read/write bar
- * functions fill it in */
- npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), 0, NPU2_NTL0_BAR);
- else
- npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), 0, NPU2_NTL1_BAR);
+ npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2DEV_BRICK(dev) == 0 ?
+ NPU2_NTL0_BAR : NPU2_NTL1_BAR);
+ phys_map_get(chip, NPU_NTL,
+ NPU2DEV_BRICK(dev) + (2 * NPU2DEV_STACK(dev)),
+ &npu2_bar->base, &npu2_bar->size);
- npu2_read_bar(p, npu2_bar);
dev->bars[0].flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
/* BAR2/3 is the GENID bar. */
npu2_bar = &dev->bars[1].npu2_bar;
- npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), 0, NPU2_GENID_BAR);
- npu2_read_bar(p, npu2_bar);
+ npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR);
+ phys_map_get(chip, NPU_GENID, NPU2DEV_STACK(dev),
+ &npu2_bar->base, &npu2_bar->size);
/* The GENID is a single physical BAR that we split
* for each emulated device */
diff --git a/include/npu2.h b/include/npu2.h
index 6476c729..88ebe9e1 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -17,6 +17,8 @@
#ifndef __NPU2_H
#define __NPU2_H
+#include <phys-map.h>
+
/* Debugging options */
#define NPU2DBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \
(p)->phb.opal_id, ##a)
@@ -45,6 +47,8 @@
* emulated PCIe BARs. The is a subtle difference between the two as
* not all BARs are exposed outside of skiboot. */
struct npu2_bar {
+ enum phys_map_type type;
+ int index;
#define NPU2_BAR_FLAG_ENABLED 0x0010
/* Generation ID's are a single space in the hardware but we split
--
Andrew Donnellan OzLabs, ADL Canberra
andrew.donnellan at au1.ibm.com IBM Australia Limited
More information about the Skiboot
mailing list