[Skiboot] [PATCH v2] hw/npu2-opencapi: Support multiple LPC devices
Frederic Barrat
fbarrat at linux.ibm.com
Sat Jan 25 00:14:02 AEDT 2020
Le 24/01/2020 à 05:57, Andrew Donnellan a écrit :
> Currently, we only have a single range for LPC memory per chip, and we only
> allow a single device to use that range.
>
> With upcoming Hostboot/SBE changes, we'll use the chip address extension
> mask to give us multiple ranges by using the masked bits of the group ID.
>
> Each device can now allocate a whole 4TB non-mirrored region. We still
> don't do >4TB ranges.
>
> If the extension mask is not set correctly, we'll fall back to only
> permitting one device and printing an error suggesting a firmware upgrade.
>
> Signed-off-by: Andrew Donnellan <ajd at linux.ibm.com>
>
> ---
>
> I've been able to test this under limited configurations, so far so good.
>
> v1->v2:
> - fix excessively large range size which I'd left in there for some reason
>
> fix size
>
> Signed-off-by: Andrew Donnellan <ajd at linux.ibm.com>
> ---
> hw/npu2-opencapi.c | 43 +++++++++++++++++++++++++++++--------------
> hw/phys-map.c | 15 +++++++++------
> include/npu2.h | 2 ++
> 3 files changed, 40 insertions(+), 20 deletions(-)
>
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 19589c92d477..37d05340e00b 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -2179,25 +2179,38 @@ static void set_mem_bar(struct npu2_dev *dev, uint64_t base, uint64_t size)
>
> static int64_t alloc_mem_bar(struct npu2_dev *dev, uint64_t size, uint64_t *bar)
> {
> - uint64_t phys_map_base, phys_map_size;
> + uint64_t phys_map_base, phys_map_size, val;
> int rc = OPAL_SUCCESS;
>
> lock(&dev->npu->lock);
>
> - /*
> - * Right now, we support 1 allocation per chip, of up to 4TB.
> - *
> - * In future, we will use chip address extension to support
> - * >4TB ranges, and we will implement a more sophisticated
> - * allocator to allow an allocation for every link on a chip.
> - */
> -
> - if (dev->npu->lpc_mem_allocated) {
Member lpc_mem_allocated is no longer used, so it could be dropped from
the npu structure....
> + if (dev->lpc_mem_base) {
> + OCAPIERR(dev, "LPC allocation failed - BAR already in use\n");
> rc = OPAL_RESOURCE;
> goto out;
> }
>
> - phys_map_get(dev->npu->chip_id, OCAPI_MEM, 0, &phys_map_base, &phys_map_size);
> + xscom_read(dev->npu->chip_id, PB_CENT_MODE, &val);
> + if (GETFIELD(PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT, val) == 0b1100100) {
> + phys_map_get(dev->npu->chip_id, OCAPI_MEM,
> + dev->brick_index - 2, &phys_map_base,
> + &phys_map_size);
> + } else {
> + bool in_use = false;
> + for (int i = 0; i < dev->npu->total_devices; i++) {
> + if (dev->npu->devices[i].lpc_mem_base)
> + in_use = true;
> + }
... or maybe renamed/reused here, to avoid having to compute 'in_use'
dynamically?
Fred
> +
> + if (in_use) {
> + OCAPIERR(dev, "LPC allocation failed - single device per chip limit, FW upgrade required (pb_cent_mode=0x%016llx)\n", val);
> + rc = OPAL_RESOURCE;
> + goto out;
> + }
> +
> + phys_map_get(dev->npu->chip_id, OCAPI_MEM, 0, &phys_map_base,
> + &phys_map_size);
> + }
>
> if (size > phys_map_size) {
> /**
> @@ -2223,7 +2236,8 @@ static int64_t alloc_mem_bar(struct npu2_dev *dev, uint64_t size, uint64_t *bar)
>
> set_mem_bar(dev, phys_map_base, size);
> *bar = phys_map_base;
> - dev->npu->lpc_mem_allocated = dev;
> + dev->lpc_mem_base = phys_map_base;
> + dev->lpc_mem_size = size;
>
> out:
> unlock(&dev->npu->lock);
> @@ -2236,13 +2250,14 @@ static int64_t release_mem_bar(struct npu2_dev *dev)
>
> lock(&dev->npu->lock);
>
> - if (dev->npu->lpc_mem_allocated != dev) {
> + if (!dev->lpc_mem_base) {
> rc = OPAL_PARAMETER;
> goto out;
> }
>
> set_mem_bar(dev, 0, 0);
> - dev->npu->lpc_mem_allocated = NULL;
> + dev->lpc_mem_base = 0;
> + dev->lpc_mem_size = 0;
>
> out:
> unlock(&dev->npu->lock);
> diff --git a/hw/phys-map.c b/hw/phys-map.c
> index 9917da7cdf94..fd79b3c123a4 100644
> --- a/hw/phys-map.c
> +++ b/hw/phys-map.c
> @@ -44,15 +44,18 @@ static const struct phys_map_entry phys_map_table_nimbus[] = {
> { GPU_MEM_4T_UP, 3, 0x0000046000000000ull, 0x0000002000000000ull },
>
> /*
> - * OpenCAPI LPC Memory - single 4TB range per chip, fills
> - * whole second non-mirrored region.
> + * OpenCAPI LPC Memory
> *
> - * Longer term, we're going to use chip address extension to
> - * enable >4TB to be allocated per chip. At that point, we
> - * may have to find another way of assigning these ranges
> - * outside of phys-map.
> + * With chip address extension enabled, we allocate 4TB ranges
> + * (in the second non-mirrored region) for each OpenCAPI link
> + * by varying the upper 2 bits of the group ID.
> + *
> + * We don't currently support >4TB ranges.
> */
> { OCAPI_MEM, 0, 0x0002000000000000ull, 0x0000040000000000ull },
> + { OCAPI_MEM, 1, 0x0002200000000000ull, 0x0000040000000000ull },
> + { OCAPI_MEM, 2, 0x0002400000000000ull, 0x0000040000000000ull },
> + { OCAPI_MEM, 3, 0x0002600000000000ull, 0x0000040000000000ull },
>
> /* 0 TB offset @ MMIO 0x0006000000000000ull */
> { PHB4_64BIT_MMIO, 0, 0x0006000000000000ull, 0x0000004000000000ull },
> diff --git a/include/npu2.h b/include/npu2.h
> index d2a3430e3e3a..c72e6c8e8f11 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -150,6 +150,8 @@ struct npu2_dev {
> uint64_t linux_pe;
> unsigned long train_start;
> unsigned long train_timeout;
> + uint64_t lpc_mem_base;
> + uint64_t lpc_mem_size;
> };
>
> struct npu2 {
>
More information about the Skiboot
mailing list