[Skiboot] [PATCH v2 10/15] hw/npu2: Combined NPU brick configuration procedure

Alexey Kardashevskiy aik at ozlabs.ru
Fri Feb 1 15:52:02 AEDT 2019



On 31/01/2019 15:39, Andrew Donnellan wrote:
> On 21/1/19 5:18 pm, Alexey Kardashevskiy wrote:
>>
>>
>> On 11/01/2019 12:09, Andrew Donnellan wrote:
>>> To support the case where we have both OpenCAPI and NVLink devices
>>> connected to the same NPU, we need to be able to configure each NPU
>>> stack/brick separately depending on the device type it is connected to.
>>>
>>> Refactor the existing code so that we can set the transport muxes and
>>> other
>>> relevant NPU registers on a per-brick/stack basis. Replace the existing
>>> hard-coded register values for NVLink setup with code to do it properly
>>> only on the devices that need it.
>>
>> This "replace" is in fact obfuscation of the actual change :( Frederic
>> must be a superman to have this reviewed :)
> 
> "Replace" is a fairly accurate description IMHO. :)

If it all happened within the same npu2_nvlink_init_npu() - suuuure.
Or ok, npu2_nvlink_init_npu() and enable_nvlink(), to avoid moving
chunks again in the same patchset. But other changes seems unrelated.


> 
>>
>>
>>>
>>> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
>>> Reviewed-by: Frederic Barrat <fbarrat at linux.ibm.com>
>>>
>>> ---
>>> v1->v2:
>>> - commit message (Fred)
>>> ---
>>>   hw/npu2-common.c    | 126
>>> +++++++++++++++++++++++++++++++++++++++++++++-
>>>   hw/npu2-opencapi.c  | 109 +---------------------------------------
>>>   hw/npu2.c           | 121 +++++++++++++++++++++++--------------------
>>>   include/npu2-regs.h |   4 +-
>>>   include/npu2.h      |   1 +-
>>>   5 files changed, 196 insertions(+), 165 deletions(-)
>>>
>>> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
>>> index 282e4873125a..e323e71cb8d5 100644
>>> --- a/hw/npu2-common.c
>>> +++ b/hw/npu2-common.c
>>> @@ -411,6 +411,130 @@ void npu2_i2c_presence_detect(struct npu2 *npu)
>>>       }
>>>   }
>>>   +/* Procedure 13.1.3.1, OpenCAPI NPU Workbook - Select OCAPI vs
>>> NVLink */
>>> +static void set_brick_config(struct npu2 *npu)
>>> +{
>>> +    /* Step 1 - Set Transport MUX controls to select correct OTL or
>>> NTL */
>>> +    uint64_t reg;
>>> +    uint64_t ndlmux_brk0to2, ocmux_brk0to1, ocmux_brk4to5;
>>> +    uint64_t phy_config_scom;
>>> +    struct npu2_dev *dev;
>>> +
>>> +    prlog(PR_DEBUG, "NPU: %s: Setting transport mux controls\n",
>>> __func__);
>>> +
>>> +    /*
>>> +     * Disable NVLink link layers on PHYs being used for OpenCAPI.
>>> +     * Experience suggests that this needs to be done early
>>> +     * (before setting the transport muxes) otherwise we get link
>>> +     * internal errors.
>>> +     */
>>> +    for (int i = 0; i < npu->total_devices; i++) {
>>> +        dev = &npu->devices[i];
>>> +        if (dev->type != NPU2_DEV_TYPE_OPENCAPI)
>>> +            continue;
>>> +        switch (dev->brick_index) {
>>> +        case 2:
>>> +        case 3:
>>> +            phy_config_scom = OBUS_LL0_IOOL_PHY_CONFIG;
>>> +            break;
>>> +        case 4:
>>> +        case 5:
>>> +            phy_config_scom = OBUS_LL3_IOOL_PHY_CONFIG;
>>> +            break;
>>> +        default:
>>> +            assert(false);
>>> +        }
>>> +        /* Disable NV-Link link layers */
>>> +        xscom_read(npu->chip_id, phy_config_scom, &reg);
>>> +        reg &= ~OBUS_IOOL_PHY_CONFIG_NV0_NPU_ENABLED;
>>> +        reg &= ~OBUS_IOOL_PHY_CONFIG_NV1_NPU_ENABLED;
>>> +        reg &= ~OBUS_IOOL_PHY_CONFIG_NV2_NPU_ENABLED;
>>> +        xscom_write(npu->chip_id, phy_config_scom, reg);
>>> +    }
>>> +
>>> +    /* Optical IO Transport Mux Config for Bricks 0-2 and 4-5 */
>>> +    reg = npu2_scom_read(npu->chip_id, npu->xscom_base,
>>> NPU2_MISC_OPTICAL_IO_CFG0,
>>> +                 NPU2_MISC_DA_LEN_8B);
>>> +    ndlmux_brk0to2 =
>>> GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg);
>>> +    ocmux_brk0to1 =
>>> GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg);
>>> +    ocmux_brk4to5 =
>>> GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg);
>>> +    for (int i = 0; i < npu->total_devices; i++) {
>>> +        dev = &npu->devices[i];
>>> +        if (dev->type == NPU2_DEV_TYPE_UNKNOWN)
>>> +            continue;
>>> +
>>> +        switch (dev->brick_index) {
>>> +        case 0:  /* NTL0.0 */
>>> +            assert(dev->type == NPU2_DEV_TYPE_NVLINK);
>>> +            ndlmux_brk0to2 |= 0b100;
>>> +            break;
>>> +        case 1:  /* NTL0.1 */
>>> +            assert(dev->type == NPU2_DEV_TYPE_NVLINK);
>>> +            ndlmux_brk0to2 |= 0b010;
>>> +            break;
>>> +        case 2:     /* NTL1.0 / OTL1.0 */
>>> +            if (dev->type == NPU2_DEV_TYPE_OPENCAPI) {
>>> +                ndlmux_brk0to2 &= ~0b100;
>>> +                ocmux_brk0to1 |= 0b10;
>>> +            } else {
>>> +                ndlmux_brk0to2 |= 0b001;
>>> +            }
>>> +            break;
>>> +        case 3:     /* NTL1.1 / OTL1.1 */
>>> +            if (dev->type == NPU2_DEV_TYPE_OPENCAPI) {
>>> +                ndlmux_brk0to2 &= ~0b010;
>>> +                ocmux_brk0to1 |= 0b01;
>>> +            }
>>> +            break;
>>> +        case 4:     /* NTL2.0 / OTL2.0 */
>>> +            if (dev->type == NPU2_DEV_TYPE_OPENCAPI) {
>>> +                ocmux_brk4to5 |= 0b10;
>>> +            } else {
>>> +                ocmux_brk4to5 &= ~0b10;
>>> +            }
>>> +            break;
>>> +        case 5:     /* NTL2.1 / OTL2.1 */
>>> +            if (dev->type == NPU2_DEV_TYPE_OPENCAPI) {
>>> +                ocmux_brk4to5 |= 0b01;
>>> +            } else {
>>> +                ocmux_brk4to5 &= ~0b01;
>>> +            }
>>> +            break;
>>> +        default:
>>> +            assert(false);
>>> +        }
>>> +    }
>>> +
>>> +    reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg,
>>> ndlmux_brk0to2);
>>> +    reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg,
>>> ocmux_brk0to1);
>>> +    reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg,
>>> ocmux_brk4to5);
>>> +    npu2_scom_write(npu->chip_id, npu->xscom_base,
>>> NPU2_MISC_OPTICAL_IO_CFG0,
>>> +            NPU2_MISC_DA_LEN_8B, reg);
>>> +
>>> +    /*
>>> +     * PowerBus Optical Miscellaneous Config Register
>>> +     */
>>> +    xscom_read(npu->chip_id, PU_IOE_PB_MISC_CFG, &reg);
>>> +    for (int i = 0; i < npu->total_devices; i++) {
>>> +        dev = &npu->devices[i];
>>> +        switch (dev->brick_index) {
>>> +        case 3:
>>> +            if (dev->type == NPU2_DEV_TYPE_NVLINK)
>>> +                reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_03_NPU_NOT_PB,
>>> reg, 1);
>>> +            break;
>>> +        case 4:
>>> +            reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_04_NPU_NOT_PB,
>>> reg, 1);
>>> +            break;
>>> +        case 5:
>>> +            reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_05_NPU_NOT_PB,
>>> reg, 1);
>>> +            break;
>>> +        default:
>>> +            break;
>>> +        }
>>> +    }
>>> +    xscom_write(npu->chip_id, PU_IOE_PB_MISC_CFG, reg);
>>> +}
>>> +
>>>   static struct npu2 *setup_npu(struct dt_node *dn)
>>>   {
>>>       struct npu2 *npu;
>>> @@ -471,6 +595,7 @@ static struct npu2 *setup_npu(struct dt_node *dn)
>>>           dev->link_index = dt_prop_get_u32(np, "ibm,npu-link-index");
>>>           /* May be overridden by platform presence detection */
>>>           dev->brick_index = dev->link_index;
>>> +        dev->group_id = dt_prop_get_u32(np, "ibm,npu-group-id");
>>>           /* Will be overridden by presence detection */
>>>           dev->type = NPU2_DEV_TYPE_UNKNOWN;
>>>           dev->npu = npu;
>>> @@ -575,6 +700,7 @@ void probe_npu2(void)
>>>           if (!npu)
>>>               continue;
>>>           platform.npu2_device_detect(npu);
>>> +        set_brick_config(npu);
>>>           setup_devices(npu);
>>>       }
>>>   }
>>> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
>>> index 1b1a06ff81ad..d3e95a45e9a4 100644
>>> --- a/hw/npu2-opencapi.c
>>> +++ b/hw/npu2-opencapi.c
>>> @@ -196,112 +196,6 @@ static uint64_t get_odl_endpoint_info(uint32_t
>>> gcid, uint64_t index)
>>>       return reg;
>>>   }
>>>   -static void disable_nvlink(uint32_t gcid, int index)
>>> -{
>>> -    uint64_t phy_config_scom, reg;
>>> -
>>> -    switch (index) {
>>> -    case 2:
>>> -    case 3:
>>> -        phy_config_scom = OBUS_LL0_IOOL_PHY_CONFIG;
>>> -        break;
>>> -    case 4:
>>> -    case 5:
>>> -        phy_config_scom = OBUS_LL3_IOOL_PHY_CONFIG;
>>> -        break;
>>> -    default:
>>> -        assert(false);
>>> -    }
>>> -    /* Disable NV-Link link layers */
>>> -    xscom_read(gcid, phy_config_scom, &reg);
>>> -    reg &= ~OBUS_IOOL_PHY_CONFIG_NV0_NPU_ENABLED;
>>> -    reg &= ~OBUS_IOOL_PHY_CONFIG_NV1_NPU_ENABLED;
>>> -    reg &= ~OBUS_IOOL_PHY_CONFIG_NV2_NPU_ENABLED;
>>> -    xscom_write(gcid, phy_config_scom, reg);
>>> -}
>>> -
>>> -/* Procedure 13.1.3.1 - select OCAPI vs NVLink for bricks 2-3/4-5 */
>>> -
>>> -static void set_transport_mux_controls(uint32_t gcid, uint32_t
>>> scom_base,
>>> -                       int index, enum npu2_dev_type type)
>>> -{
>>> -    /* Step 1 - Set Transport MUX controls to select correct OTL or
>>> NTL */
>>> -    uint64_t reg;
>>> -    uint64_t field;
>>> -
>>> -    /* TODO: Rework this to select for NVLink too */
>>> -    assert(type == NPU2_DEV_TYPE_OPENCAPI);
>>> -
>>> -    prlog(PR_DEBUG, "OCAPI: %s: Setting transport mux controls\n",
>>> __func__);
>>> -
>>> -    /* Optical IO Transport Mux Config for Bricks 0-2 and 4-5 */
>>> -    reg = npu2_scom_read(gcid, scom_base, NPU2_MISC_OPTICAL_IO_CFG0,
>>> -                 NPU2_MISC_DA_LEN_8B);
>>> -    switch (index) {
>>> -    case 0:
>>> -    case 1:
>>> -        /* not valid for OpenCAPI */
>>> -        assert(false);
>>> -        break;
>>> -    case 2:     /* OTL1.0 */
>>> -        field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2,
>>> reg);
>>> -        field &= ~0b100;
>>> -        reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg,
>>> -                   field);
>>> -        field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg);
>>> -        field |= 0b10;
>>> -        reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg,
>>> -                   field);
>>> -        break;
>>> -    case 3:     /* OTL1.1 */
>>> -        field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2,
>>> reg);
>>> -        field &= ~0b010;
>>> -        reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg,
>>> -                   field);
>>> -        field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg);
>>> -        field |= 0b01;
>>> -        reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg,
>>> -                   field);
>>> -        break;
>>> -    case 4:     /* OTL2.0 */
>>> -        field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg);
>>> -        field |= 0b10;
>>> -        reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg,
>>> -                   field);
>>> -        break;
>>> -    case 5:     /* OTL2.1 */
>>> -        field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg);
>>> -        field |= 0b01;
>>> -        reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg,
>>> -                   field);
>>> -        break;
>>> -    default:
>>> -        assert(false);
>>> -    }
>>> -    npu2_scom_write(gcid, scom_base, NPU2_MISC_OPTICAL_IO_CFG0,
>>> -            NPU2_MISC_DA_LEN_8B, reg);
>>> -
>>> -    /*
>>> -     * PowerBus Optical Miscellaneous Config Register - select
>>> -     * OpenCAPI for b4/5 and A-Link for b3
>>> -     */
>>> -    xscom_read(gcid, PU_IOE_PB_MISC_CFG, &reg);
>>> -    switch (index) {
>>> -    case 0:
>>> -    case 1:
>>> -    case 2:
>>> -    case 3:
>>> -        break;
>>> -    case 4:
>>> -        reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_04_NPU_NOT_PB, reg, 1);
>>> -        break;
>>> -    case 5:
>>> -        reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_05_NPU_NOT_PB, reg, 1);
>>> -        break;
>>> -    }
>>> -    xscom_write(gcid, PU_IOE_PB_MISC_CFG, reg);
>>> -}
>>> -
>>>   static void enable_odl_phy_mux(uint32_t gcid, int index)
>>>   {
>>>       uint64_t reg;
>>> @@ -579,9 +473,6 @@ static void brick_config(uint32_t gcid, uint32_t
>>> scom_base, int index)
>>>        * We assume at this point that the PowerBus Hotplug Mode Control
>>>        * register is correctly set by Hostboot
>>>        */
>>> -    disable_nvlink(gcid, index);
>>> -    set_transport_mux_controls(gcid, scom_base, index,
>>> -                   NPU2_DEV_TYPE_OPENCAPI);
>>>       enable_odl_phy_mux(gcid, index);
>>>       disable_alink_fp(gcid);
>>>       enable_xsl_clocks(gcid, scom_base, index);
>>> diff --git a/hw/npu2.c b/hw/npu2.c
>>> index 6c720764e614..a8744f4af7f7 100644
>>> --- a/hw/npu2.c
>>> +++ b/hw/npu2.c
>>> @@ -1308,6 +1308,62 @@ static const struct phb_ops npu_ops = {
>>>       .tce_kill        = npu2_tce_kill,
>>>   };
>>>   +static void enable_nvlink(struct npu2_dev *dev)
>>> +{
>>> +    struct npu2 *npu = dev->npu;
>>> +    int stack = NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev);
>>> +    int block;
>>> +    uint64_t addr, val;
>>> +
>>> +    /* CQ_SM Misc Config #0 - enable NVLink mode */
>>> +    for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
>>> +        addr = NPU2_REG_OFFSET(stack, block, NPU2_CQ_SM_MISC_CFG0);
>>> +        val = npu2_scom_read(npu->chip_id, npu->xscom_base, addr,
>>> +                     NPU2_MISC_DA_LEN_8B);
>>> +        val |= NPU2_CQ_SM_MISC_CFG0_CONFIG_NVLINK_MODE;
>>> +        npu2_scom_write(npu->chip_id, npu->xscom_base, addr,
>>> +                NPU2_MISC_DA_LEN_8B, val);
>>> +    }
>>> +
>>> +    /* CQ_CTL Misc Config #0 - enable NVLink mode */
>>> +    addr = NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
>>> NPU2_CQ_CTL_MISC_CFG);
>>> +    val = npu2_scom_read(npu->chip_id, npu->xscom_base, addr,
>>> +                 NPU2_MISC_DA_LEN_8B);
>>> +    val |= NPU2_CQ_CTL_MISC_CFG_CONFIG_NVLINK_MODE;
>>> +    npu2_scom_write(npu->chip_id, npu->xscom_base, addr,
>>> +            NPU2_MISC_DA_LEN_8B, val);
>>> +
>>> +    /* CQ_DAT Misc Config #1 - enable NVLink mode */
>>> +    addr = NPU2_REG_OFFSET(stack, NPU2_BLOCK_DAT,
>>> NPU2_CQ_DAT_MISC_CFG);
>>> +    val = npu2_scom_read(npu->chip_id, npu->xscom_base, addr,
>>> +                 NPU2_MISC_DA_LEN_8B);
>>> +    val |= NPU2_CQ_DAT_MISC_CFG_CONFIG_NVLINK_MODE;
>>> +    npu2_scom_write(npu->chip_id, npu->xscom_base, addr,
>>> +            NPU2_MISC_DA_LEN_8B, val);
>>> +
>>> +    /* NTL Misc Config 2 - enable NTL brick and checks */
>>> +    addr = NPU2_NTL_MISC_CFG2(dev);
>>> +    val = npu2_scom_read(npu->chip_id, npu->xscom_base, addr,
>>> +                 NPU2_MISC_DA_LEN_8B);
>>> +    val |= NPU2_NTL_MISC_CFG2_BRICK_ENABLE;
>>> +    val |= NPU2_NTL_MISC_CFG2_NDL_TX_PARITY_ENA;
>>> +    val |= NPU2_NTL_MISC_CFG2_NDL_PRI_PARITY_ENA;
>>> +    val |= NPU2_NTL_MISC_CFG2_RCV_CREDIT_OVERFLOW_ENA;
>>> +    npu2_scom_write(npu->chip_id, npu->xscom_base, addr,
>>> +            NPU2_MISC_DA_LEN_8B, val);
>>> +
>>> +    /* High Water Marks */
>>> +    for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
>>> +        addr = NPU2_REG_OFFSET(stack, block, NPU2_HIGH_WATER_MARKS);
>>> +        val = npu2_scom_read(npu->chip_id, npu->xscom_base, addr,
>>> +                     NPU2_MISC_DA_LEN_8B);
>>> +        val &= ~NPU2_HIGH_WATER_MARKS_PWR0;
>>> +        val |= PPC_BIT(6) | PPC_BIT(7) | PPC_BIT(11);
>>> +        npu2_scom_write(npu->chip_id, npu->xscom_base, addr,
>>> +                NPU2_MISC_DA_LEN_8B, val);
>>> +    }
>>> +}
>>> +
>>>   /*
>>>    * Set up NPU for NVLink and create PCI root device node
>>>    * accordingly.
>>> @@ -1315,59 +1371,12 @@ static const struct phb_ops npu_ops = {
>>>   int npu2_nvlink_init_npu(struct npu2 *npu)
>>>   {
>>>       struct dt_node *np;
>>> -    uint64_t reg[2], mm_win[2], val;
>>> +    uint64_t reg[2], mm_win[2];
>>>   -    /* TODO: Clean this up with register names, etc. when we get
>>> -     * time. This just turns NVLink mode on in each brick and should
>>> -     * get replaced with a patch from ajd once we've worked out how
>>> -     * things are going to work there.
>>> -     *
>>> -     * Obviously if the year is now 2020 that didn't happen and you
>>> -     * should fix this :-) */
>>> -    xscom_write_mask(npu->chip_id, 0x5011000, PPC_BIT(58),
>>> PPC_BIT(58));
>>> -    xscom_write_mask(npu->chip_id, 0x5011030, PPC_BIT(58),
>>> PPC_BIT(58));
>>> -    xscom_write_mask(npu->chip_id, 0x5011060, PPC_BIT(58),
>>> PPC_BIT(58));
>>> -    xscom_write_mask(npu->chip_id, 0x5011090, PPC_BIT(58),
>>> PPC_BIT(58));
>>> -    xscom_write_mask(npu->chip_id, 0x5011200, PPC_BIT(58),
>>> PPC_BIT(58));
>>> -    xscom_write_mask(npu->chip_id, 0x5011230, PPC_BIT(58),
>>> PPC_BIT(58));
>>> -    xscom_write_mask(npu->chip_id, 0x5011260, PPC_BIT(58),
>>> PPC_BIT(58));
>>> -    xscom_write_mask(npu->chip_id, 0x5011290, PPC_BIT(58),
>>> PPC_BIT(58));
>>> -    xscom_write_mask(npu->chip_id, 0x5011400, PPC_BIT(58),
>>> PPC_BIT(58));
>>> -    xscom_write_mask(npu->chip_id, 0x5011430, PPC_BIT(58),
>>> PPC_BIT(58));
>>> -    xscom_write_mask(npu->chip_id, 0x5011460, PPC_BIT(58),
>>> PPC_BIT(58));
>>> -    xscom_write_mask(npu->chip_id, 0x5011490, PPC_BIT(58),
>>> PPC_BIT(58));
>>> -
>>> -    xscom_write_mask(npu->chip_id, 0x50110c0, PPC_BIT(53),
>>> PPC_BIT(53));
>>> -    xscom_write_mask(npu->chip_id, 0x50112c0, PPC_BIT(53),
>>> PPC_BIT(53));
>>> -    xscom_write_mask(npu->chip_id, 0x50114c0, PPC_BIT(53),
>>> PPC_BIT(53));
>>> -    xscom_write_mask(npu->chip_id, 0x50110f1, PPC_BIT(41),
>>> PPC_BIT(41));
>>> -    xscom_write_mask(npu->chip_id, 0x50112f1, PPC_BIT(41),
>>> PPC_BIT(41));
>>> -    xscom_write_mask(npu->chip_id, 0x50114f1, PPC_BIT(41),
>>> PPC_BIT(41));
>>> -
>>> -    val = NPU2_NTL_MISC_CFG2_BRICK_ENABLE |
>>> -          NPU2_NTL_MISC_CFG2_NDL_TX_PARITY_ENA |
>>> -          NPU2_NTL_MISC_CFG2_NDL_PRI_PARITY_ENA |
>>> -          NPU2_NTL_MISC_CFG2_RCV_CREDIT_OVERFLOW_ENA;
>>> -    xscom_write_mask(npu->chip_id, 0x5011110, val, val);
>>> -    xscom_write_mask(npu->chip_id, 0x5011130, val, val);
>>> -    xscom_write_mask(npu->chip_id, 0x5011310, val, val);
>>> -    xscom_write_mask(npu->chip_id, 0x5011330, val, val);
>>> -    xscom_write_mask(npu->chip_id, 0x5011510, val, val);
>>> -    xscom_write_mask(npu->chip_id, 0x5011530, val, val);
>>> -
>>> -    val = PPC_BIT(6) | PPC_BIT(7) | PPC_BIT(11);
>>> -    xscom_write_mask(npu->chip_id, 0x5011009, val, PPC_BITMASK(6,11));
>>> -    xscom_write_mask(npu->chip_id, 0x5011039, val, PPC_BITMASK(6,11));
>>> -    xscom_write_mask(npu->chip_id, 0x5011069, val, PPC_BITMASK(6,11));
>>> -    xscom_write_mask(npu->chip_id, 0x5011099, val, PPC_BITMASK(6,11));
>>> -    xscom_write_mask(npu->chip_id, 0x5011209, val, PPC_BITMASK(6,11));
>>> -    xscom_write_mask(npu->chip_id, 0x5011239, val, PPC_BITMASK(6,11));
>>> -    xscom_write_mask(npu->chip_id, 0x5011269, val, PPC_BITMASK(6,11));
>>> -    xscom_write_mask(npu->chip_id, 0x5011299, val, PPC_BITMASK(6,11));
>>> -    xscom_write_mask(npu->chip_id, 0x5011409, val, PPC_BITMASK(6,11));
>>> -    xscom_write_mask(npu->chip_id, 0x5011439, val, PPC_BITMASK(6,11));
>>> -    xscom_write_mask(npu->chip_id, 0x5011469, val, PPC_BITMASK(6,11));
>>> -    xscom_write_mask(npu->chip_id, 0x5011499, val, PPC_BITMASK(6,11));
>>> +    for (int i = 0; i < npu->total_devices; i++) {
>>> +        if (npu->devices[i].type == NPU2_DEV_TYPE_NVLINK)
>>> +            enable_nvlink(&npu->devices[i]);
>>> +    }
>>>         /* Populate PCI root device node */
>>>       reg[0] = (uint64_t)npu->regs;
>>> @@ -1599,6 +1608,9 @@ static uint32_t npu_allocate_bdfn(struct npu2
>>> *p, uint32_t group, int size)
>>>       int bdfn = (group << 3);
>>>         for (i = 0; i < size; i++) {
>>> +        if (p->devices[i].type != NPU2_DEV_TYPE_NVLINK)
>>> +            continue;
>>> +
>>>           if ((p->devices[i].bdfn & 0xf8) == (bdfn & 0xf8))
>>>               bdfn++;
>>>       }
>>> @@ -1612,14 +1624,11 @@ static void npu2_configure_devices(struct
>>> npu2 *p)
>>>       uint32_t index = 0;
>>>         for (index = 0; index < p->total_devices; index++) {
>>> -        uint32_t group_id;
>>> -
>>>           dev = &p->devices[index];
>>>           if (dev->type != NPU2_DEV_TYPE_NVLINK)
>>>               continue;
>>>   -        group_id = dt_prop_get_u32(dev->dt_node, "ibm,npu-group-id");
>>> -        dev->bdfn = npu_allocate_bdfn(p, group_id, index);
>>> +        dev->bdfn = npu_allocate_bdfn(p, dev->group_id, index);
>>>             /* This must be done after calling
>>>            * npu_allocate_bdfn() */
>>> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
>>> index 8273b2be26a2..a8f59dcdbe2f 100644
>>> --- a/include/npu2-regs.h
>>> +++ b/include/npu2-regs.h
>>> @@ -119,6 +119,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t
>>> scom_base,
>>>   #define NPU2_CQ_SM_MISC_CFG0            0x000
>>>   #define   NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS PPC_BIT(38)
>>>   #define   NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE PPC_BIT(57)
>>> +#define   NPU2_CQ_SM_MISC_CFG0_CONFIG_NVLINK_MODE PPC_BIT(58)
>>>   #define NPU2_CQ_SM_MISC_CFG1            0x008
>>>   #define NPU2_CQ_SM_MISC_CFG2            0x148
>>>   #define NPU2_PB_EPSILON                0x010
>>> @@ -152,6 +153,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t
>>> scom_base,
>>>   #define NPU2_LOW_WATER_MARKS            0x040
>>>   #define   NPU2_LOW_WATER_MARKS_ENABLE_MACHINE_ALLOC PPC_BIT(51)
>>>   #define NPU2_HIGH_WATER_MARKS            0x048
>>> +#define   NPU2_HIGH_WATER_MARKS_PWR0        PPC_BITMASK(6, 11);
>>>   #define NPU2_RELAXED_ORDERING_CFG(n)        (0x050 + (n)*8)
>>>   #define   NPU2_RELAXED_ORDERING_SOURCE(n)    (PPC_BITMASK(0,31) >>
>>> ((n)*32))
>>>   #define     NPU2_RELAXED_ORDERING_SOURCE_ENA    PPC_BITMASK32(0,3)
>>> @@ -207,6 +209,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t
>>> scom_base,
>>>   /* CTL block registers */
>>>   #define NPU2_CQ_CTL_MISC_CFG            0x000
>>>   #define   NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(52)
>>> +#define   NPU2_CQ_CTL_MISC_CFG_CONFIG_NVLINK_MODE PPC_BIT(53)
>>>   #define   NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE PPC_BIT(55)
>>>   #define   NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE PPC_BIT(56)
>>>   #define NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG        0x0B0
>>> @@ -261,6 +264,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t
>>> scom_base,
>>>   /* DAT block registers */
>>>   #define NPU2_CQ_DAT_MISC_CFG            0x008
>>>   #define      NPU2_CQ_DAT_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(40)
>>> +#define      NPU2_CQ_DAT_MISC_CFG_CONFIG_NVLINK_MODE PPC_BIT(41)
>>>   #define NPU2_CQ_DAT_ECC_CFG            0x010
>>>   #define NPU2_CQ_DAT_SCRATCH0            0x018
>>>   #define NPU2_CQ_DAT_ECC_STATUS            0x020
>>> diff --git a/include/npu2.h b/include/npu2.h
>>> index 75c11ff3c26f..347a69b30d7d 100644
>>> --- a/include/npu2.h
>>> +++ b/include/npu2.h
>>> @@ -114,6 +114,7 @@ struct npu2_dev {
>>>       enum npu2_dev_type    type;
>>>       uint32_t        link_index;
>>>       uint32_t        brick_index;
>>> +    uint32_t        group_id;
>>
>> Why this change? You do not seem to do dt_prop_get_u32(dev->dt_node,
>> "ibm,npu-group-id") often anyway.
> 
> Because I want to keep all our device tree accessing together, as much
> as possible, and have everything else avoid touching the device tree.


imho we better off avoiding extra fields in various structures as this
way we need to take care of them being properly initialized while the
device tree is just always there.

At very least this change does not belong here.

> 
>>
>>
>>>       uint64_t        pl_xscom_base;
>>>       struct dt_node        *dt_node;
>>>       struct npu2_bar        ntl_bar;
>>>
>>
> 

-- 
Alexey


More information about the Skiboot mailing list