[Skiboot] [PATCH v2] npu2: Use phys-map to get MMIO BARs
Alistair Popple
alistair at popple.id.au
Fri Jun 16 16:35:42 AEST 2017
Hi Andrew,
Couple of minor comments below. Didn't find any typos though, guess I can leave
that to others...
On Thu, 15 Jun 2017 04:23:04 PM Andrew Donnellan wrote:
> Commit bdea201a4c4b ("hw/npu2.c: Use phys-map to get GPU memory BARs")
> added use of phys-map for setting GPU memory BARs.
>
> Move the MMIO BARs over to using phys-map as well.
>
> Cc: Alistair Popple <alistair at popple.id.au>
> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> ---
>
> On top of http://patchwork.ozlabs.org/patch/776138/
>
> ---
> hw/npu2.c | 90 ++++++++++++++++++++++++++--------------------------------
> include/npu2.h | 4 +++
> 2 files changed, 44 insertions(+), 50 deletions(-)
>
> diff --git a/hw/npu2.c b/hw/npu2.c
> index 8786cc1e..6019c644 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -1101,55 +1101,45 @@ static const struct phb_ops npu_ops = {
>
> static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2], uint64_t mm_win[2])
> {
> - uint64_t mem_start;
> uint32_t i;
> struct npu2_bar *bar;
> struct npu2_bar npu2_bars[] = {
> - { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR), .size = 0x1000000,
> + /*
> + * NPU_REGS must be first in this list, at least on DD1.
> + * On DD2, stack 0 will be used for NPU_REGS, stack 1/2 for NPU_PHY.
> + */
> + { .type = NPU_REGS, .index = 0,
> + .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
> .flags = NPU2_BAR_FLAG_ENABLED },
> - { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR), .size = 0x200000,
> + { .type = NPU_PHY, .index = 0,
> + .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
> .flags = NPU2_BAR_FLAG_ENABLED },
> - { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR), .size = 0x200000,
> + { .type = NPU_PHY, .index = 1,
> + .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
> .flags = NPU2_BAR_FLAG_ENABLED },
> - { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL0_BAR), .size = 0x20000 },
> - { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL1_BAR), .size = 0x20000 },
> - { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL0_BAR), .size = 0x20000 },
> - { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL1_BAR), .size = 0x20000 },
> - { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL0_BAR), .size = 0x20000 },
> - { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL1_BAR), .size = 0x20000 },
> - { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_GENID_BAR), .size = 0x20000 },
> - { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_GENID_BAR), .size = 0x20000 },
> - { .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_GENID_BAR), .size = 0x20000 },
> + { .type = NPU_NTL, .index = 0,
> + .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL0_BAR) },
> + { .type = NPU_NTL, .index = 1,
> + .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL1_BAR) },
> + { .type = NPU_NTL, .index = 2,
> + .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL0_BAR) },
> + { .type = NPU_NTL, .index = 3,
> + .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL1_BAR) },
> + { .type = NPU_NTL, .index = 4,
> + .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL0_BAR) },
> + { .type = NPU_NTL, .index = 5,
> + .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL1_BAR) },
> + { .type = NPU_GENID, .index = 0,
> + .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_GENID_BAR) },
> + { .type = NPU_GENID, .index = 1,
> + .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_GENID_BAR) },
> + { .type = NPU_GENID, .index = 2,
> + .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_GENID_BAR) },
> };
>
> - mem_start = 0x6030200000000;
> - mem_start |= gcid << PPC_BITLSHIFT(21);
> -
> - /*
> - * We're going to assign the BARs in reversed order according
> - * to their sizes, just like the order we have in npu_bars[].
> - * In that way, all BARs will be aligned perfectly without
> - * wasting resources. Also, the Linux kernel won't change
> - * anything though it attempts to reassign the BARs that
> - * it can see, which are NTL and GENID BARs.
> - *
> - * GLOBAL MMIO (16MB)
> - * PHY0 (2MB)
> - * PHB1 (2MB)
> - * NTL0 (128KB)
> - * NTL1 (128KB)
> - * NTL2 (128KB)
> - * NTL3 (128KB)
> - * NTL4 (128KB)
> - * NTL5 (128KB)
> - * GENID0 (128KB)
> - * GENID1 (128KB)
> - * GENID2 (128KB)
> - */
> for (i = 0; i < ARRAY_SIZE(npu2_bars); i++) {
> bar = &npu2_bars[i];
> - bar->base = mem_start;
> - mem_start += bar->size;
> + phys_map_get(gcid, bar->type, bar->index, &bar->base, &bar->size);
> npu2_write_bar(NULL, bar, gcid, scom);
> }
>
> @@ -1442,6 +1432,7 @@ static void npu2_populate_devices(struct npu2 *p,
> struct npu2_dev *dev;
> struct dt_node *npu2_dn, *link;
> uint32_t npu_phandle, index = 0;
> + int stack;
>
> /*
> * Get the npu node which has the links which we expand here
> @@ -1474,23 +1465,22 @@ static void npu2_populate_devices(struct npu2 *p,
> dev->pl_xscom_base = dt_prop_get_u64(link, "ibm,npu-phy");
> dev->lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");
>
> - /* Populate BARs. BAR0/1 is the NTL bar. We initialise
> - * it from the HW. */
> + /* Populate BARs. BAR0/1 is the NTL bar. */
> + stack = NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev);
> npu2_bar = &dev->bars[0].npu2_bar;
> - if (NPU2DEV_BRICK(dev) == 0)
> - /* Leave the block as 0 - the read/write bar
> - * functions fill it in */
> - npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), 0, NPU2_NTL0_BAR);
> - else
> - npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), 0, NPU2_NTL1_BAR);
> + npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2DEV_BRICK(dev) == 0 ?
> + NPU2_NTL0_BAR : NPU2_NTL1_BAR);
> + phys_map_get(p->chip_id, NPU_NTL,
> + NPU2DEV_BRICK(dev) + (2 * NPU2DEV_STACK(dev)),
It would be clearer to use dev->index here. Given you add fields to struct
npu2_bar to store type and index we should probably just initialise them as well
and reference them in the phys_map_get() call.
> + &npu2_bar->base, &npu2_bar->size);
>
> - npu2_read_bar(p, npu2_bar);
> dev->bars[0].flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
>
> /* BAR2/3 is the GENID bar. */
> npu2_bar = &dev->bars[1].npu2_bar;
> - npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev), 0, NPU2_GENID_BAR);
> - npu2_read_bar(p, npu2_bar);
> + npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR);
> + phys_map_get(p->chip_id, NPU_GENID, NPU2DEV_STACK(dev),
NPU2_STACK(dev) is correct here but again you should initialise it in
npu2_bar->index and npu2_bar->type.
Thanks,
Alistair
> + &npu2_bar->base, &npu2_bar->size);
>
> /* The GENID is a single physical BAR that we split
> * for each emulated device */
> diff --git a/include/npu2.h b/include/npu2.h
> index 6476c729..88ebe9e1 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -17,6 +17,8 @@
> #ifndef __NPU2_H
> #define __NPU2_H
>
> +#include <phys-map.h>
> +
> /* Debugging options */
> #define NPU2DBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \
> (p)->phb.opal_id, ##a)
> @@ -45,6 +47,8 @@
> * emulated PCIe BARs. The is a subtle difference between the two as
> * not all BARs are exposed outside of skiboot. */
> struct npu2_bar {
> + enum phys_map_type type;
> + int index;
> #define NPU2_BAR_FLAG_ENABLED 0x0010
>
> /* Generation ID's are a single space in the hardware but we split
>
More information about the Skiboot
mailing list