[Skiboot] [PATCH] hw/npu2-opencapi: Add initial support for allocating OpenCAPI LPC memory
Andrew Donnellan
andrew.donnellan at au1.ibm.com
Mon Apr 29 11:11:52 AEST 2019
On 24/4/19 11:56 pm, Frederic Barrat wrote:
> Hi Andrew,
>
> A few minor comments below.
>
>
>
> Le 24/04/2019 à 09:20, Andrew Donnellan a écrit :
>> Lowest Point of Coherency (LPC) memory allows the host to access
>> memory on
>> an OpenCAPI device.
>>
>> Define 2 OPAL calls, OPAL_NPU_MEM_ALLOC and OPAL_NPU_MEM_RELEASE, for
>> assigning and clearing the memory BAR. (We try to avoid using the term
>> "LPC" to avoid confusion with Low Pin Count.)
>>
>> At present, we use a fixed location in the address space, which means we
>> are restricted to a single range of 4TB, on a single OpenCAPI device per
>> chip. In future, we'll use some chip ID extension magic to give us more
>> space, and some sort of allocator to assign ranges to more than one
>> device.
>>
>> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
>>
>> ---
>>
>> This code is currently being used for some internal testing of LPC memory
>> devices and seems to work acceptably for that purpose. We haven't tested
>> all the corner cases... this is really just intended to enable
>> prototyping
>> and bringup at this stage.
>> ---
>> hw/npu2-opencapi.c | 182
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>> hw/phys-map.c | 11 ++++
>> include/npu2-regs.h | 7 ++
>> include/npu2.h | 5 ++
>> include/opal-api.h | 4 +-
>> include/phys-map.h | 1 +
>> 6 files changed, 208 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
>> index 9df51b22eda5..b98335e48daf 100644
>> --- a/hw/npu2-opencapi.c
>> +++ b/hw/npu2-opencapi.c
>> @@ -2025,3 +2025,185 @@ static int64_t opal_npu_tl_set(uint64_t
>> phb_id, uint32_t __unused bdfn,
>> return OPAL_SUCCESS;
>> }
>> opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5);
>> +
>> +static void set_mem_bar(struct npu2_dev *dev, uint64_t base, uint64_t
>> size)
>> +{
>> + uint64_t stack, val, reg, bar_offset, pa_config_offset;
>> + uint8_t memsel;
>> +
>> + stack = index_to_stack(dev->brick_index);
>> + switch (dev->brick_index) {
>> + case 2:
>> + case 4:
>> + bar_offset = NPU2_GPU0_MEM_BAR;
>> + pa_config_offset = NPU2_CQ_CTL_MISC_PA0_CONFIG;
>> + break;
>> + case 3:
>> + case 5:
>> + bar_offset = NPU2_GPU1_MEM_BAR;
>> + pa_config_offset = NPU2_CQ_CTL_MISC_PA1_CONFIG;
>> + break;
>> + default:
>> + assert(false);
>> + }
>> +
>> + /*
>> + * Memory select configuration:
>> + * - 0b000 - BAR disabled
>> + * - 0b001 - match 0b00, 0b01
>> + * - 0b010 - match 0b01, 0b10
>> + * - 0b011 - match 0b00, 0b10
>> + * - 0b100 - match 0b00
>> + * - 0b101 - match 0b01
>> + * - 0b110 - match 0b10
>> + * - 0b111 - match 0b00, 0b01, 0b10
>> + */
>> + memsel = GETFIELD(PPC_BITMASK(13, 14), base);
>> + val = SETFIELD(NPU2_MEM_BAR_EN | NPU2_MEM_BAR_SEL_MEM, 0ULL,
>> 0b100 + memsel);
>> +
>> + /* Base address - 12 bits, 1G aligned */
>> + val = SETFIELD(NPU2_MEM_BAR_NODE_ADDR, val,
>> GETFIELD(PPC_BITMASK(22, 33), base));
>> +
>> + /* GCID */
>> + val = SETFIELD(NPU2_MEM_BAR_GROUP, val, GETFIELD(PPC_BITMASK(15,
>> 18), base));
>> + val = SETFIELD(NPU2_MEM_BAR_CHIP, val, GETFIELD(PPC_BITMASK(19,
>> 21), base));
>> +
>> + /* Other settings */
>> + val = SETFIELD(NPU2_MEM_BAR_POISON, val, 1);
>> + val = SETFIELD(NPU2_MEM_BAR_GRANULE, val, 0);
>> + val = SETFIELD(NPU2_MEM_BAR_BAR_SIZE, val, ilog2(size >> 30));
>> + val = SETFIELD(NPU2_MEM_BAR_MODE, val, 0);
>> +
>> + for (int block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3;
>> block++) {
>> + reg = NPU2_REG_OFFSET(stack, block, bar_offset);
>> + npu2_write(dev->npu, reg, val);
>> + }
>> +
>> + /* Set PA config */
>> + val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MEMSELMATCH, 0ULL,
>> 0b100 + memsel);
>> + val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_GRANULE, val, 0);
>> + val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_SIZE, val, ilog2(size
>> >> 30));
>> + val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MODE, val, 0);
>> + val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MASK, val, 0);
>> + reg = NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, pa_config_offset);
>> + npu2_write(dev->npu, reg, val);
>> +}
>> +
>> +static int64_t alloc_mem_bar(struct npu2_dev *dev, uint64_t size,
>> uint64_t *bar)
>> +{
>> + uint64_t phys_map_base, phys_map_size;
>> +
>> + /*
>> + * Right now, we support 1 allocation per chip, of up to 4TB.
>> + *
>> + * In future, we will use chip address extension to support
>> + * >4TB ranges, and we will implement a more sophisticated
>> + * allocator to allow an allocation for every link on a chip.
>> + */
>> +
>> + if (dev->npu->lpc_mem_allocated)
>> + return OPAL_RESOURCE;
>> +
>> + phys_map_get(dev->npu->chip_id, OCAPI_MEM, 0, &phys_map_base,
>> &phys_map_size);
>> +
>> + if (size > phys_map_size) {
>> + /**
>> + * @fwts-label OCAPIInvalidLPCMemoryBARSize
>> + * @fwts-advice The operating system requested an unsupported
>> + * amount of OpenCAPI LPC memory. This is possibly a kernel
>> + * bug, or you may need to upgrade your firmware.
>> + */
>> + prlog(PR_ERR,
>> + "OCAPI: Invalid LPC memory BAR allocation size
>> requested: 0x%llx bytes (limit 0x%llx)\n",
>> + size, phys_map_size);
>
>
> OCAPIERR(dev, ...) ?
Ack
>
>
>> + return OPAL_PARAMETER;
>> + }
>> +
>> + /* Minimum BAR size is 1 GB */
>> + if (size < (2 << 29)) {
>> + size = 2 << 29;
>
>
> Interesting.... Is that the perception down under? 1 << 30 would seem
> more natural to me :-)
You're right, I'm just bad at maths... will fix
>
>
>> + }
>> +
>> + if (!is_pow2(size)) {
>> + size = 2 << ilog2(size);
>> + }
>> +
>> + set_mem_bar(dev, phys_map_base, size);
>> + *bar = phys_map_base;
>> + dev->npu->lpc_mem_allocated = dev;
>> +
>> + return OPAL_SUCCESS;
>> +}
>> +
>> +static int64_t release_mem_bar(struct npu2_dev *dev)
>> +{
>> + uint64_t stack, reg, bar_offset, pa_config_offset;
>> +
>> + if (dev->npu->lpc_mem_allocated != dev)
>> + return OPAL_PARAMETER;
>
> OPAL_PERMISSION?
It's not a "permission" thing as such, this is just the very simplistic
way that we tell that the device doesn't have any LPC BAR associated with it
>
>> +
>> + stack = index_to_stack(dev->brick_index);
>> + switch (dev->brick_index) {
>> + case 2:
>> + case 4:
>> + bar_offset = NPU2_GPU0_MEM_BAR;
>> + pa_config_offset = NPU2_CQ_CTL_MISC_PA0_CONFIG;
>> + break;
>> + case 3:
>> + case 5:
>> + bar_offset = NPU2_GPU1_MEM_BAR;
>> + pa_config_offset = NPU2_CQ_CTL_MISC_PA1_CONFIG;
>> + break;
>> + default:
>> + assert(false);
>> + }
>> +
>> + for (int block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3;
>> block++) {
>> + reg = NPU2_REG_OFFSET(stack, block, bar_offset);
>> + npu2_write(dev->npu, reg, 0ull);
>> + }
>> + reg = NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, pa_config_offset);
>> + npu2_write(dev->npu, reg, 0ull);
>
>
> I was expecting set_mem_bar(dev, 0, 0)
Yes I should just do that.
>
>
>> +
>> + dev->npu->lpc_mem_allocated = NULL;
>> +
>> + return OPAL_SUCCESS;
>> +}
>> +
>> +static int64_t opal_npu_mem_alloc(uint64_t phb_id, uint32_t __unused
>> bdfn,
>> + uint64_t size, uint64_t *bar)
>> +{
>> + struct phb *phb = pci_get_phb(phb_id);
>> + struct npu2_dev *dev;
>> +
>> +
>> + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
>> + return OPAL_PARAMETER;
>> +
>> + dev = phb_to_npu2_dev_ocapi(phb);
>> + if (!dev)
>> + return OPAL_PARAMETER;
>> +
>> + if (!opal_addr_valid(bar))
>> + return OPAL_PARAMETER;
>> +
>> + return alloc_mem_bar(dev, size, bar);
>> +}
>> +opal_call(OPAL_NPU_MEM_ALLOC, opal_npu_mem_alloc, 4);
>> +
>> +static int64_t opal_npu_mem_release(uint64_t phb_id, uint32_t
>> __unused bdfn)
>> +{
>> + struct phb *phb = pci_get_phb(phb_id);
>> + struct npu2_dev *dev;
>> +
>> +
>> + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
>> + return OPAL_PARAMETER;
>> +
>> + dev = phb_to_npu2_dev_ocapi(phb);
>> + if (!dev)
>> + return OPAL_PARAMETER;
>> +
>> + return release_mem_bar(dev);
>> +}
>> +opal_call(OPAL_NPU_MEM_RELEASE, opal_npu_mem_release, 2);
>> diff --git a/hw/phys-map.c b/hw/phys-map.c
>> index fe949e4043ff..75836297c2f9 100644
>> --- a/hw/phys-map.c
>> +++ b/hw/phys-map.c
>> @@ -52,6 +52,17 @@ static const struct phys_map_entry
>> phys_map_table_nimbus[] = {
>> { GPU_MEM_4T_UP, 2, 0x0000044000000000ull,
>> 0x0000002000000000ull },
>> { GPU_MEM_4T_UP, 3, 0x0000046000000000ull,
>> 0x0000002000000000ull },
>> + /*
>> + * OpenCAPI LPC Memory - single 4TB range per chip, fills
>> + * whole second non-mirrored region.
>> + *
>> + * Longer term, we're going to use chip address extension to
>> + * enable >4TB to be allocated per chip. At that point, we
>> + * may have to find another way of assigning these ranges
>> + * outside of phys-map.
>> + */
>> + { OCAPI_MEM, 0, 0x0002000000000000ull,
>> 0x0000040000000000ull },
>> +
>
>
> I found some old discussions between Balbir and powerVM about the
> suitable spot for the LPC memory, and it matches that range. Is that
> also your source, or are there some other considerations as well?
Those would be the discussions we were having around the time I started
implementing this.
>
> Fred
>
>
>> /* 0 TB offset @ MMIO 0x0006000000000000ull */
>> { PHB4_64BIT_MMIO, 0, 0x0006000000000000ull,
>> 0x0000004000000000ull },
>> { PHB4_64BIT_MMIO, 1, 0x0006004000000000ull,
>> 0x0000004000000000ull },
>> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
>> index ba10b8eaf88d..ecf47abf6c96 100644
>> --- a/include/npu2-regs.h
>> +++ b/include/npu2-regs.h
>> @@ -239,6 +239,13 @@ void npu2_scom_write(uint64_t gcid, uint64_t
>> scom_base,
>> #define NPU2_CQ_CTL_STATUS 0x090
>> #define NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED PPC_BITMASK(48, 49)
>> #define NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED PPC_BITMASK(50, 51)
>> +#define NPU2_CQ_CTL_MISC_PA0_CONFIG 0x0A0 /* or should that be
>> CS */
>> +#define NPU2_CQ_CTL_MISC_PA1_CONFIG 0x0A8 /* or should that be
>> CS */
>> +#define NPU2_CQ_CTL_MISC_PA_CONFIG_MEMSELMATCH PPC_BITMASK(0,2)
>> +#define NPU2_CQ_CTL_MISC_PA_CONFIG_GRANULE PPC_BIT(3)
>> +#define NPU2_CQ_CTL_MISC_PA_CONFIG_SIZE PPC_BITMASK(4,7)
>> +#define NPU2_CQ_CTL_MISC_PA_CONFIG_MODE PPC_BITMASK(8,11)
>> +#define NPU2_CQ_CTL_MISC_PA_CONFIG_MASK PPC_BITMASK(13,19)
>> #define NPU2_CQ_C_ERR_RPT_MSG0 0x0C0
>> #define NPU2_CQ_C_ERR_RPT_MSG1 0x0C8
>> #define NPU2_CQ_C_ERR_RPT_FIRST0 0x0D0
>> diff --git a/include/npu2.h b/include/npu2.h
>> index d58aab47bb30..9febf1a343ef 100644
>> --- a/include/npu2.h
>> +++ b/include/npu2.h
>> @@ -193,6 +193,11 @@ struct npu2 {
>> struct lock i2c_lock;
>> uint8_t i2c_pin_mode;
>> uint8_t i2c_pin_wr_state;
>> + /*
>> + * Which device currently has an LPC allocation.
>> + * Temporary as long as we only support 1 LPC alloc per chip.
>> + */
>> + struct npu2_dev *lpc_mem_allocated;
>> };
>> static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
>> diff --git a/include/opal-api.h b/include/opal-api.h
>> index e461c9d278c2..b0ad435539fb 100644
>> --- a/include/opal-api.h
>> +++ b/include/opal-api.h
>> @@ -227,8 +227,8 @@
>> #define OPAL_NPU_SET_RELAXED_ORDER 168
>> #define OPAL_NPU_GET_RELAXED_ORDER 169
>> #define OPAL_XIVE_GET_VP_STATE 170 /* Get NVT state */
>> -#define OPAL_NPU_RESERVED1 171 /* LPC Allocate */
>> -#define OPAL_NPU_RESERVED2 172 /* LPC Release */
>> +#define OPAL_NPU_MEM_ALLOC 171
>> +#define OPAL_NPU_MEM_RELEASE 172
>> #define OPAL_LAST 172
>> #define QUIESCE_HOLD 1 /* Spin all calls at entry */
>> diff --git a/include/phys-map.h b/include/phys-map.h
>> index 73adda079e23..0cf48b6628af 100644
>> --- a/include/phys-map.h
>> +++ b/include/phys-map.h
>> @@ -28,6 +28,7 @@ enum phys_map_type {
>> SYSTEM_MEM,
>> GPU_MEM_4T_DOWN,
>> GPU_MEM_4T_UP,
>> + OCAPI_MEM,
>> PHB4_64BIT_MMIO,
>> PHB4_32BIT_MMIO,
>> PHB4_XIVE_ESB,
>>
--
Andrew Donnellan OzLabs, ADL Canberra
andrew.donnellan at au1.ibm.com IBM Australia Limited
More information about the Skiboot
mailing list