[Skiboot] [PATCH 10/16] npu2-opencapi: Tweak fundamental reset sequence

christophe lombard clombard at linux.vnet.ibm.com
Tue Sep 17 19:45:10 AEST 2019


On 09/09/2019 14:31, Frederic Barrat wrote:
> Modify slightly the ordering of a few steps in our init sequence on
> fundamental reset, so that it can be called from the OS, when the link
> is already up:
> 
> - when the card is reset, the link goes down, so we need to fence the
>    brick to prevent errors propagating to the NPU and OS
> - since fencing and unfencing don't require any delay, let's also
>    fence/unfence during the very first reset at boot. It's useless but
>    doesn't hurt and keep the code simpler.
> - resetting the PHY must be done a bit later, while fenced and the ODL
>    and DLx in reset
> 
> Signed-off-by: Frederic Barrat <fbarrat at linux.ibm.com>
> ---
>   hw/npu2-opencapi.c | 48 +++++++++++++++++++++++++---------------------
>   include/npu2.h     |  2 --
>   2 files changed, 26 insertions(+), 24 deletions(-)
> 
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index f7be9f09..619d4be8 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -1039,6 +1039,28 @@ static int64_t npu2_opencapi_get_presence_state(struct pci_slot __unused *slot,
>   	return OPAL_SUCCESS;
>   }
>   
> +static void fence_brick(struct npu2_dev *dev)
> +{
> +	OCAPIDBG(dev, "Fencing brick\n");
> +	set_fence_control(dev->npu->chip_id, dev->npu->xscom_base,
> +			  dev->brick_index, 0b11);
> +	/* from 13.2.1, Quiesce Fence State */
> +	npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
> +		   PPC_BIT(dev->brick_index + 6));
> +}
> +
> +static void unfence_brick(struct npu2_dev *dev)
> +{
> +	OCAPIDBG(dev, "Unfencing brick\n");
> +	npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
> +		   PPC_BIT(dev->brick_index));
> +
> +	set_fence_control(dev->npu->chip_id, dev->npu->xscom_base,
> +			  dev->brick_index, 0b10);

Unfence the brick implies to reset the PowerBus (0b10)?
This has been already done in fence_brick() ?

> +	set_fence_control(dev->npu->chip_id, dev->npu->xscom_base,
> +			  dev->brick_index, 0b00);
> +}
> +
>   static enum OpalShpcLinkState get_link_width(uint64_t odl_status)
>   {
>   	uint64_t tx_lanes, rx_lanes, state;
> @@ -1153,7 +1175,7 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
>   	return OPAL_HARDWARE;
>   }
>   
> -static int64_t npu2_opencapi_creset(struct pci_slot *slot __unused)
> +static int64_t npu2_opencapi_creset(struct pci_slot *slot)
>   {
>   	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
>   
> @@ -1183,19 +1205,10 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
>   			OCAPIINF(dev, "no card detected\n");
>   			return OPAL_SUCCESS;
>   		}
> -		if (dev->train_need_fence) {
> -			OCAPIDBG(dev, "Fencing OTL during reset\n");
> -			set_fence_control(chip_id, dev->npu->xscom_base,
> -					dev->brick_index, 0b11);
> -			npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
> -				PPC_BIT(dev->brick_index + 6));
> -			dev->train_fenced = true;
> -		}
> -		dev->train_need_fence = true;
>   		slot->link_retries = OCAPI_LINK_TRAINING_RETRIES;
> -		npu2_opencapi_phy_reset(dev);
>   		/* fall-through */
>   	case OCAPI_SLOT_FRESET_INIT:
> +		fence_brick(dev);
>   		assert_odl_reset(chip_id, dev->brick_index);
>   		assert_adapter_reset(dev);
>   		pci_slot_set_state(slot,
> @@ -1204,6 +1217,7 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
>   		return pci_slot_set_sm_timeout(slot, msecs_to_tb(5));
>   
>   	case OCAPI_SLOT_FRESET_ASSERT_DELAY:
> +		npu2_opencapi_phy_reset(dev);
>   		deassert_odl_reset(chip_id, dev->brick_index);
>   		pci_slot_set_state(slot,
>   				OCAPI_SLOT_FRESET_DEASSERT_DELAY);
> @@ -1221,15 +1235,7 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
>   		return pci_slot_set_sm_timeout(slot, msecs_to_tb(250));
>   
>   	case OCAPI_SLOT_FRESET_DEASSERT_DELAY2:
> -		if (dev->train_fenced) {
> -			OCAPIDBG(dev, "Unfencing OTL after reset\n");
> -			npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
> -				   PPC_BIT(dev->brick_index));
> -			set_fence_control(chip_id, dev->npu->xscom_base,
> -					  dev->brick_index, 0b00);
> -			dev->train_fenced = false;
> -		}
> -
> +		unfence_brick(dev);
>   		set_init_pattern(chip_id, dev);
>   		pci_slot_set_state(slot,
>   				OCAPI_SLOT_FRESET_INIT_DELAY);
> @@ -1692,8 +1698,6 @@ static void setup_device(struct npu2_dev *dev)
>   
>   	dev->bdfn = 0;
>   	dev->linux_pe = -1;
> -	dev->train_need_fence = false;
> -	dev->train_fenced = false;
>   
>   	/* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */
>   	/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
> diff --git a/include/npu2.h b/include/npu2.h
> index d2316dc1..6b1063da 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -145,8 +145,6 @@ struct npu2_dev {
>   	/* OpenCAPI */
>   	struct phb		phb_ocapi;
>   	uint64_t		linux_pe;
> -	bool			train_need_fence;
> -	bool			train_fenced;
>   	unsigned long		train_start;
>   	unsigned long		train_timeout;
>   };
> 



More information about the Skiboot mailing list