[Skiboot] [PATCH v2] npu2: Use phys-map to get MMIO BARs
Andrew Donnellan
andrew.donnellan at au1.ibm.com
Thu Jun 15 16:23:04 AEST 2017
Commit bdea201a4c4b ("hw/npu2.c: Use phys-map to get GPU memory BARs")
added use of phys-map for setting GPU memory BARs.
Move the MMIO BARs over to using phys-map as well.
Cc: Alistair Popple <alistair at popple.id.au>
Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
---
On top of http://patchwork.ozlabs.org/patch/776138/
---
hw/npu2.c | 90 ++++++++++++++++++++++++++--------------------------------
include/npu2.h | 4 +++
2 files changed, 44 insertions(+), 50 deletions(-)
diff --git a/hw/npu2.c b/hw/npu2.c
index 8786cc1e..6019c644 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -1101,55 +1101,45 @@ static const struct phb_ops npu_ops = {
static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2], uint64_t mm_win[2])
{
- uint64_t mem_start;
uint32_t i;
struct npu2_bar *bar;
struct npu2_bar npu2_bars[] = {
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR), .size = 0x1000000,
+ /*
+ * NPU_REGS must be first in this list, at least on DD1.
+ * On DD2, stack 0 will be used for NPU_REGS, stack 1/2 for NPU_PHY.
+ */
+ { .type = NPU_REGS, .index = 0,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
.flags = NPU2_BAR_FLAG_ENABLED },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR), .size = 0x200000,
+ { .type = NPU_PHY, .index = 0,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
.flags = NPU2_BAR_FLAG_ENABLED },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR), .size = 0x200000,
+ { .type = NPU_PHY, .index = 1,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
.flags = NPU2_BAR_FLAG_ENABLED },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL0_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL1_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL0_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL1_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL0_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL1_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_GENID_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_GENID_BAR), .size = 0x20000 },
- { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_GENID_BAR), .size = 0x20000 },
+ { .type = NPU_NTL, .index = 0,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL0_BAR) },
+ { .type = NPU_NTL, .index = 1,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL1_BAR) },
+ { .type = NPU_NTL, .index = 2,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL0_BAR) },
+ { .type = NPU_NTL, .index = 3,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL1_BAR) },
+ { .type = NPU_NTL, .index = 4,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL0_BAR) },
+ { .type = NPU_NTL, .index = 5,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL1_BAR) },
+ { .type = NPU_GENID, .index = 0,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_GENID_BAR) },
+ { .type = NPU_GENID, .index = 1,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_GENID_BAR) },
+ { .type = NPU_GENID, .index = 2,
+ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_GENID_BAR) },
};
- mem_start = 0x6030200000000;
- mem_start |= gcid << PPC_BITLSHIFT(21);
-
- /*
- * We're going to assign the BARs in reversed order according
- * to their sizes, just like the order we have in npu_bars[].
- * In that way, all BARs will be aligned perfectly without
- * wasting resources. Also, the Linux kernel won't change
- * anything though it attempts to reassign the BARs that
- * it can see, which are NTL and GENID BARs.
- *
- * GLOBAL MMIO (16MB)
- * PHY0 (2MB)
- * PHB1 (2MB)
- * NTL0 (128KB)
- * NTL1 (128KB)
- * NTL2 (128KB)
- * NTL3 (128KB)
- * NTL4 (128KB)
- * NTL5 (128KB)
- * GENID0 (128KB)
- * GENID1 (128KB)
- * GENID2 (128KB)
- */
for (i = 0; i < ARRAY_SIZE(npu2_bars); i++) {
bar = &npu2_bars[i];
- bar->base = mem_start;
- mem_start += bar->size;
+ phys_map_get(gcid, bar->type, bar->index, &bar->base, &bar->size);
npu2_write_bar(NULL, bar, gcid, scom);
}
@@ -1442,6 +1432,7 @@ static void npu2_populate_devices(struct npu2 *p,
struct npu2_dev *dev;
struct dt_node *npu2_dn, *link;
uint32_t npu_phandle, index = 0;
+ int stack;
/*
* Get the npu node which has the links which we expand here
@@ -1474,23 +1465,22 @@ static void npu2_populate_devices(struct npu2 *p,
dev->pl_xscom_base = dt_prop_get_u64(link, "ibm,npu-phy");
dev->lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");
- /* Populate BARs. BAR0/1 is the NTL bar. We initialise
- * it from the HW. */
+ /* Populate BARs. BAR0/1 is the NTL bar. */
+ stack = NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev);
npu2_bar = &dev->bars[0].npu2_bar;
- if (NPU2DEV_BRICK(dev) == 0)
- /* Leave the block as 0 - the read/write bar
- * functions fill it in */
- npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), 0, NPU2_NTL0_BAR);
- else
- npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), 0, NPU2_NTL1_BAR);
+ npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2DEV_BRICK(dev) == 0 ?
+ NPU2_NTL0_BAR : NPU2_NTL1_BAR);
+ phys_map_get(p->chip_id, NPU_NTL,
+ NPU2DEV_BRICK(dev) + (2 * NPU2DEV_STACK(dev)),
+ &npu2_bar->base, &npu2_bar->size);
- npu2_read_bar(p, npu2_bar);
dev->bars[0].flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
/* BAR2/3 is the GENID bar. */
npu2_bar = &dev->bars[1].npu2_bar;
- npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), 0, NPU2_GENID_BAR);
- npu2_read_bar(p, npu2_bar);
+ npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR);
+ phys_map_get(p->chip_id, NPU_GENID, NPU2DEV_STACK(dev),
+ &npu2_bar->base, &npu2_bar->size);
/* The GENID is a single physical BAR that we split
* for each emulated device */
diff --git a/include/npu2.h b/include/npu2.h
index 6476c729..88ebe9e1 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -17,6 +17,8 @@
#ifndef __NPU2_H
#define __NPU2_H
+#include <phys-map.h>
+
/* Debugging options */
#define NPU2DBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \
(p)->phb.opal_id, ##a)
@@ -45,6 +47,8 @@
* emulated PCIe BARs. The is a subtle difference between the two as
* not all BARs are exposed outside of skiboot. */
struct npu2_bar {
+ enum phys_map_type type;
+ int index;
#define NPU2_BAR_FLAG_ENABLED 0x0010
/* Generation ID's are a single space in the hardware but we split
--
Andrew Donnellan OzLabs, ADL Canberra
andrew.donnellan at au1.ibm.com IBM Australia Limited
More information about the Skiboot
mailing list