[Skiboot] [PATCH 10/16] npu2-opencapi: Tweak fundamental reset sequence
christophe lombard
clombard at linux.vnet.ibm.com
Tue Sep 17 19:45:10 AEST 2019
On 09/09/2019 14:31, Frederic Barrat wrote:
> Modify slightly the ordering of a few steps in our init sequence on
> fundamental reset, so that it can be called from the OS, when the link
> is already up:
>
> - when the card is reset, the link goes down, so we need to fence the
> brick to prevent errors propagating to the NPU and OS
> - since fencing and unfencing don't require any delay, let's also
> fence/unfence during the very first reset at boot. It's useless but
> doesn't hurt and keep the code simpler.
> - resetting the PHY must be done a bit later, while fenced and the ODL
> and DLx in reset
>
> Signed-off-by: Frederic Barrat <fbarrat at linux.ibm.com>
> ---
> hw/npu2-opencapi.c | 48 +++++++++++++++++++++++++---------------------
> include/npu2.h | 2 --
> 2 files changed, 26 insertions(+), 24 deletions(-)
>
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index f7be9f09..619d4be8 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -1039,6 +1039,28 @@ static int64_t npu2_opencapi_get_presence_state(struct pci_slot __unused *slot,
> return OPAL_SUCCESS;
> }
>
> +static void fence_brick(struct npu2_dev *dev)
> +{
> + OCAPIDBG(dev, "Fencing brick\n");
> + set_fence_control(dev->npu->chip_id, dev->npu->xscom_base,
> + dev->brick_index, 0b11);
> + /* from 13.2.1, Quiesce Fence State */
> + npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
> + PPC_BIT(dev->brick_index + 6));
> +}
> +
> +static void unfence_brick(struct npu2_dev *dev)
> +{
> + OCAPIDBG(dev, "Unfencing brick\n");
> + npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
> + PPC_BIT(dev->brick_index));
> +
> + set_fence_control(dev->npu->chip_id, dev->npu->xscom_base,
> + dev->brick_index, 0b10);
Unfence the brick implies to reset the PowerBus (0b10)?
This has been already done in fence_brick() ?
> + set_fence_control(dev->npu->chip_id, dev->npu->xscom_base,
> + dev->brick_index, 0b00);
> +}
> +
> static enum OpalShpcLinkState get_link_width(uint64_t odl_status)
> {
> uint64_t tx_lanes, rx_lanes, state;
> @@ -1153,7 +1175,7 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
> return OPAL_HARDWARE;
> }
>
> -static int64_t npu2_opencapi_creset(struct pci_slot *slot __unused)
> +static int64_t npu2_opencapi_creset(struct pci_slot *slot)
> {
> struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
>
> @@ -1183,19 +1205,10 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
> OCAPIINF(dev, "no card detected\n");
> return OPAL_SUCCESS;
> }
> - if (dev->train_need_fence) {
> - OCAPIDBG(dev, "Fencing OTL during reset\n");
> - set_fence_control(chip_id, dev->npu->xscom_base,
> - dev->brick_index, 0b11);
> - npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
> - PPC_BIT(dev->brick_index + 6));
> - dev->train_fenced = true;
> - }
> - dev->train_need_fence = true;
> slot->link_retries = OCAPI_LINK_TRAINING_RETRIES;
> - npu2_opencapi_phy_reset(dev);
> /* fall-through */
> case OCAPI_SLOT_FRESET_INIT:
> + fence_brick(dev);
> assert_odl_reset(chip_id, dev->brick_index);
> assert_adapter_reset(dev);
> pci_slot_set_state(slot,
> @@ -1204,6 +1217,7 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
> return pci_slot_set_sm_timeout(slot, msecs_to_tb(5));
>
> case OCAPI_SLOT_FRESET_ASSERT_DELAY:
> + npu2_opencapi_phy_reset(dev);
> deassert_odl_reset(chip_id, dev->brick_index);
> pci_slot_set_state(slot,
> OCAPI_SLOT_FRESET_DEASSERT_DELAY);
> @@ -1221,15 +1235,7 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
> return pci_slot_set_sm_timeout(slot, msecs_to_tb(250));
>
> case OCAPI_SLOT_FRESET_DEASSERT_DELAY2:
> - if (dev->train_fenced) {
> - OCAPIDBG(dev, "Unfencing OTL after reset\n");
> - npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
> - PPC_BIT(dev->brick_index));
> - set_fence_control(chip_id, dev->npu->xscom_base,
> - dev->brick_index, 0b00);
> - dev->train_fenced = false;
> - }
> -
> + unfence_brick(dev);
> set_init_pattern(chip_id, dev);
> pci_slot_set_state(slot,
> OCAPI_SLOT_FRESET_INIT_DELAY);
> @@ -1692,8 +1698,6 @@ static void setup_device(struct npu2_dev *dev)
>
> dev->bdfn = 0;
> dev->linux_pe = -1;
> - dev->train_need_fence = false;
> - dev->train_fenced = false;
>
> /* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */
> /* Procedure 13.1.3.8 - AFU MMIO Range BARs */
> diff --git a/include/npu2.h b/include/npu2.h
> index d2316dc1..6b1063da 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -145,8 +145,6 @@ struct npu2_dev {
> /* OpenCAPI */
> struct phb phb_ocapi;
> uint64_t linux_pe;
> - bool train_need_fence;
> - bool train_fenced;
> unsigned long train_start;
> unsigned long train_timeout;
> };
>
More information about the Skiboot
mailing list