[Skiboot] [PATCH 5/5] npu2: Move to new GPU memory map

Tue Nov 14 11:39:40 AEDT 2017

On Mon, Nov 13, 2017 at 10:06 PM, Michael Neuling <mikey at neuling.org> wrote:
> There are three different ways we configure the MCD and memory map.
>
> 1) Old way (current way)
>    Skiboot configures the MCD and puts GPUs at 4TB and below
> 2) New way with MCD
>    Hostboot configures the MCD and skiboot puts GPU at 4TB and above
> 3) New way without MCD
>    No one configures the MCD and skiboot puts GPU at 4TB and below
>
> The patch keeps option 1 and adds options 2 and 3.
>
> The different configurations are detected using certain scoms (see
> patch).
>
> Option 1 will go away eventually as it's a configuration that can
> cause xstops or data integrity problems. We are keeping it around to
> support existing hostboot.
>
> Option 2 supports only 4 GPUs and 512GB of memory per socket.
>
> Option 3 supports 6 GPUs and 4TB of memory but may have some
> performance impact.
>
> Signed-off-by: Michael Neuling <mikey at neuling.org>
> ---
>  hw/npu2.c           | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>  hw/phys-map.c       |  5 +++++
>  include/npu2-regs.h |  5 +++++
>  include/npu2.h      |  1 +
>  include/phys-map.h  |  1 +
>  5 files changed, 63 insertions(+), 5 deletions(-)
>
> diff --git a/hw/npu2.c b/hw/npu2.c
> index 082591a638..9752dec5d7 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -593,10 +593,11 @@ static struct dt_node *npu2_create_memory_dn(uint64_t addr, uint64_t size)
>   * on bdfn. */
>  static void npu2_get_gpu_base(struct npu2_dev *ndev, uint64_t *addr, uint64_t *size)
>  {
> +       struct npu2 *p = ndev->npu;
>         int group;
>
>         group = (ndev->bdfn >> 3) & 0x1f;
> -       phys_map_get(ndev->npu->chip_id, GPU_MEM_4T_DOWN, group, addr, size);
> +       phys_map_get(ndev->npu->chip_id, p->gpu_map_type, group, addr, size);
>  }
>
>  static void npu2_dn_fixup_gmb(struct dt_node *pd_dn, struct npu2_dev *ndev)
> @@ -855,16 +856,16 @@ static void npu2_mcd_init(struct npu2 *p)
>         uint64_t size, addr, gpu_min_addr, gpu_max_addr, total_size;
>
>         /* Init memory cache directory (MCD) registers. */
> -       phys_map_get(p->chip_id, GPU_MEM_4T_DOWN, NPU2_LINKS_PER_CHIP - 1,
> +       phys_map_get(p->chip_id, p->gpu_map_type, NPU2_LINKS_PER_CHIP - 1,
>                         &gpu_min_addr, NULL);
> -       phys_map_get(p->chip_id, GPU_MEM_4T_DOWN, 0, &gpu_max_addr, &size);
> +       phys_map_get(p->chip_id, p->gpu_map_type, 0, &gpu_max_addr, &size);
>         gpu_max_addr += size;
>
>         /* We assume GPU memory is contiguous from the first possible GPU to the
>          * last and that the size is the same so best to check that. */
>         for (i = 0; i < NPU2_LINKS_PER_CHIP; i++) {
>                 uint64_t tmp;
> -               phys_map_get(p->chip_id, GPU_MEM_4T_DOWN, i, &addr, &tmp);
> +               phys_map_get(p->chip_id, p->gpu_map_type, i, &addr, &tmp);
>                 assert((addr >= gpu_min_addr) && (addr + tmp <= gpu_max_addr));
>                 assert(tmp == size);
>         }
> @@ -905,7 +906,52 @@ static void npu2_hw_init(struct npu2 *p)
>                 npu2_write(p, NPU2_XTS_CFG2, val | NPU2_XTS_CFG2_NO_FLUSH_ENA);
>         }
>
> -       npu2_mcd_init(p);
> +       /*
> +        * There are three different ways we configure the MCD and memory map.
> +        * 1) Old way
> +        *    Skiboot configures the MCD and puts GPUs at 4TB and below
> +        * 2) New way with MCD
> +        *    Hostboot configures the MCD and skiboot puts GPU at 4TB and above
> +        * 3) New way without MCD
> +        *    No one configures the MCD and skiboot puts GPU at 4TB and below
> +        *
> +        * 1) Will go away evenutally as it's a configuration that can
> +        *    cause an xstop or data integrity problems. We are keeping
> +        *    it around to support existing hostboot. Print error
> +        *    message if used.
> +        * 2) Is for smaller memory configurations and will be used
> +        *    initially for GPUs on Witherspoon. Supports only to
> +        *    512GB of memory and 4 GPUs per socket.
> +        * 3) Is for fully populated configurations of 4TB of memory
> +        *    and 6GPUs per socket. May have performance impacts.
> +        *
> +        * The different configurations can be detected via the following scoms:
> +        * 1) 0x5011c0c bit 2 = 1, 0x5011c0a bits 42:48 = 0
> +        * 2) 0x5011c0c bit 2 = 1, 0x5011c0a bits 42:48 = 3
> +        * 3) 0x5011c0c bit 2 = 0, 0x5011c0a bits 42:48 = 0
> +        */
> +
> +       /* Get 0x05011c0c bit 2 = 1 */
> +       xscom_read(p->chip_id, PB_CENT_HP_MODE_CURR, &val);
> +       if ((val & PB_CFG_CHG_RATE_GP_MASTER) != 0) {
> +               /* Get 0x05011c0a bits 42:48 */
> +               xscom_read(p->chip_id, PB_CENT_MODE, &val);
> +               if (GETFIELD(PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT, val) == 0) {
> +                       /* 1) */
> +                       NPU2DBG(p, "Using old memory map + MCD enabled in skiboot\n");
> +                       NPU2ERR(p, "!!! Old firmware detected. Update hostboot for new MCD mapping !!!\n");
> +                       p->gpu_map_type = GPU_MEM_4T_DOWN;
> +                       npu2_mcd_init(p);
> +               } else {
> +                       /* 2) */
> +                       NPU2DBG(p, "Using small memory map + MCD enabled\n");
> +                       p->gpu_map_type = GPU_MEM_4T_UP;

Should be explicitly check if GETFIELD(..., val) == 3? I think it
might be safer.

> +               }
> +       } else {
> +               /* 3) */
> +               NPU2DBG(p, "Using large memory map + MCD disabled\n");
> +               p->gpu_map_type = GPU_MEM_4T_DOWN;
> +       }
>  }
>
>  static int64_t npu2_map_pe_dma_window_real(struct phb *phb,
> diff --git a/hw/phys-map.c b/hw/phys-map.c
> index 02bc33b8f8..a2b5dbd262 100644
> --- a/hw/phys-map.c
> +++ b/hw/phys-map.c
> @@ -46,6 +46,11 @@ static const struct phys_map_entry phys_map_table_nimbus[] = {
>         { GPU_MEM_4T_DOWN, 2, 0x000003a000000000ull, 0x0000002000000000ull },
>         { GPU_MEM_4T_DOWN, 1, 0x000003c000000000ull, 0x0000002000000000ull },
>         { GPU_MEM_4T_DOWN, 0, 0x000003e000000000ull, 0x0000002000000000ull },
> +       /* GPU memory from 4TB + 128GB*GPU. 4 GPUs only */

Physmap Space for 4 GPUs per socket, I guess that is implied

> +       { GPU_MEM_4T_UP,   0, 0x0000040000000000ull, 0x0000002000000000ull },
> +       { GPU_MEM_4T_UP,   1, 0x0000042000000000ull, 0x0000002000000000ull },
> +       { GPU_MEM_4T_UP,   2, 0x0000044000000000ull, 0x0000002000000000ull },
> +       { GPU_MEM_4T_UP,   3, 0x0000046000000000ull, 0x0000002000000000ull },
>
>         /* 0 TB offset @ MMIO 0x0006000000000000ull */
>         { PHB4_64BIT_MMIO, 0, 0x0006000000000000ull, 0x0000004000000000ull },
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index cef9dbf7f8..2764f9291e 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -31,6 +31,11 @@ void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask);
>  #define        MCD_BANK_CN_VALID       PPC_BIT(0)
>  #define        MCD_BANK_CN_SIZE        PPC_BITMASK(13,29)
>  #define        MCD_BANK_CN_ADDR        PPC_BITMASK(33,63)
> +#define PB_CENT_HP_MODE_CURR   0x5011c0c
> +#define  PB_CFG_CHG_RATE_GP_MASTER PPC_BIT(2)
> +#define PB_CENT_MODE           0x5011c0a
> +#define  PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT PPC_BITMASK(42,48)
> +
>
>  #define NPU2_REG_OFFSET(stack, block, offset) \
>         (((stack) << 20) | ((block) << 16) | (offset))
> diff --git a/include/npu2.h b/include/npu2.h
> index f11a13a0b2..925eae0a64 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -128,6 +128,7 @@ struct npu2 {
>         uint32_t        base_lsi;
>         uint32_t        total_devices;
>         struct npu2_dev *devices;
> +       enum phys_map_type gpu_map_type;
>
>         /* IODA cache */
>         uint64_t        lxive_cache[8];
> diff --git a/include/phys-map.h b/include/phys-map.h
> index c9b2389615..73adda079e 100644
> --- a/include/phys-map.h
> +++ b/include/phys-map.h
> @@ -27,6 +27,7 @@ enum phys_map_type {
>         NULL_MAP,
>         SYSTEM_MEM,
>         GPU_MEM_4T_DOWN,
> +       GPU_MEM_4T_UP,
>         PHB4_64BIT_MMIO,
>         PHB4_32BIT_MMIO,
>         PHB4_XIVE_ESB,

Balbir Singh.