[PATCH 2/2] powerpc/powernv: ocxl move TL definition

Mon Oct 14 21:21:41 AEDT 2019

Le 09/10/2019 à 17:11, christophe lombard a écrit :
> Specifies the templates in the Transaction Layer that the OpenCAPI device/host
> support when transmitting/receiving DL/DLX frames to or from the OpenCAPI
> device/host.
> Update, rename and create new few platform-specific calls which can be used by
> drivers.
> 
> No functional change.
> 
> Signed-off-by: Christophe Lombard <clombard at linux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/pnv-ocxl.h   |   5 +-
>   arch/powerpc/platforms/powernv/ocxl.c | 103 ++++++++++++++++++++++++--
>   drivers/misc/ocxl/config.c            |  89 +---------------------
>   3 files changed, 99 insertions(+), 98 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
> index 8e516e339e6c..b8c68878b4ba 100644
> --- a/arch/powerpc/include/asm/pnv-ocxl.h
> +++ b/arch/powerpc/include/asm/pnv-ocxl.h
> @@ -13,10 +13,7 @@ extern int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
>   			u16 *supported);
>   extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);
> 
> -extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
> -			char *rate_buf, int rate_buf_size);
> -extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
> -			uint64_t rate_buf_phys, int rate_buf_size);
> +extern int pnv_ocxl_set_TL(struct pci_dev *dev, int tl_dvsec);
> 
>   extern int pnv_ocxl_platform_setup(struct pci_dev *dev,
>   				   int PE_mask, int *hwirq,
> diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
> index 4d26cba12b63..351324cffc2b 100644
> --- a/arch/powerpc/platforms/powernv/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/ocxl.c
> @@ -369,8 +369,8 @@ static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
>   	buf[idx] |= rate << shift;
>   }
> 
> -int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
> -			char *rate_buf, int rate_buf_size)
> +static int get_tl_cap(struct pci_dev *dev, long *cap,
> +		      char *rate_buf, int rate_buf_size)
>   {
>   	if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
>   		return -EINVAL;
> @@ -390,10 +390,9 @@ int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
>   	*cap = PNV_OCXL_TL_P9_RECV_CAP;
>   	return 0;
>   }
> -EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
> 
> -int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
> -			uint64_t rate_buf_phys, int rate_buf_size)
> +static int set_tl_conf(struct pci_dev *dev, long cap,
> +		       uint64_t rate_buf_phys, int rate_buf_size)
>   {
>   	struct pci_controller *hose = pci_bus_to_host(dev->bus);
>   	struct pnv_phb *phb = hose->private_data;
> @@ -410,7 +409,99 @@ int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
>   	}
>   	return 0;
>   }
> -EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
> +
> +int pnv_ocxl_set_TL(struct pci_dev *dev, int tl_dvsec)
> +{
> +	u32 val;
> +	__be32 *be32ptr;
> +	u8 timers;
> +	int i, rc;
> +	long recv_cap;
> +	char *recv_rate;
> +
> +	recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
> +	if (!recv_rate)
> +		return -ENOMEM;
> +	/*
> +	 * The spec defines 64 templates for messages in the
> +	 * Transaction Layer (TL).
> +	 *
> +	 * The host and device each support a subset, so we need to
> +	 * configure the transmitters on each side to send only
> +	 * templates the receiver understands, at a rate the receiver
> +	 * can process.  Per the spec, template 0 must be supported by
> +	 * everybody. That's the template which has been used by the
> +	 * host and device so far.
> +	 *
> +	 * The sending rate limit must be set before the template is
> +	 * enabled.
> +	 */
> +
> +	/*
> +	 * Device -> host
> +	 */
> +	rc = get_tl_cap(dev, &recv_cap, recv_rate,
> +			PNV_OCXL_TL_RATE_BUF_SIZE);
> +	if (rc)
> +		goto out;
> +
> +	for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
> +		be32ptr = (__be32 *) &recv_rate[i];
> +		pci_write_config_dword(dev,
> +				tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
> +				be32_to_cpu(*be32ptr));
> +	}
> +	val = recv_cap >> 32;
> +	pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
> +	val = recv_cap & GENMASK(31, 0);
> +	pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val);
> +
> +	/*
> +	 * Host -> device
> +	 */
> +	for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
> +		pci_read_config_dword(dev,
> +				tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
> +				&val);
> +		be32ptr = (__be32 *) &recv_rate[i];
> +		*be32ptr = cpu_to_be32(val);
> +	}
> +	pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
> +	recv_cap = (long) val << 32;
> +	pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val);
> +	recv_cap |= val;
> +
> +	rc = set_tl_conf(dev, recv_cap, __pa(recv_rate),
> +			 PNV_OCXL_TL_RATE_BUF_SIZE);
> +	if (rc)
> +		goto out;
> +
> +	/*
> +	 * Opencapi commands needing to be retried are classified per
> +	 * the TL in 2 groups: short and long commands.
> +	 *
> +	 * The short back off timer it not used for now. It will be
> +	 * for opencapi 4.0.
> +	 *
> +	 * The long back off timer is typically used when an AFU hits
> +	 * a page fault but the NPU is already processing one. So the
> +	 * AFU needs to wait before it can resubmit. Having a value
> +	 * too low doesn't break anything, but can generate extra
> +	 * traffic on the link.
> +	 * We set it to 1.6 us for now. It's shorter than, but in the
> +	 * same order of magnitude as the time spent to process a page
> +	 * fault.
> +	 */
> +	timers = 0x2 << 4; /* long timer = 1.6 us */
> +	pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
> +			timers);
> +

How does it work in the virtualized case? We would also need to do those 
config space reads and writes. I'm guessing it's all handled in the host 
behind a hcall, as we don't really want to have the guest mess with the 
link configuration?

   Fred

> +	rc = 0;
> +out:
> +	kfree(recv_rate);
> +	return rc;
> +}
> +EXPORT_SYMBOL_GPL(pnv_ocxl_set_TL);
> 
>   static int get_xsl_irq(struct pci_dev *dev, int *hwirq)
>   {
> diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
> index c8e19bfb5ef9..7ca0f6744125 100644
> --- a/drivers/misc/ocxl/config.c
> +++ b/drivers/misc/ocxl/config.c
> @@ -709,100 +709,13 @@ EXPORT_SYMBOL_GPL(ocxl_config_set_afu_state);
> 
>   int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
>   {
> -	u32 val;
> -	__be32 *be32ptr;
> -	u8 timers;
> -	int i, rc;
> -	long recv_cap;
> -	char *recv_rate;
> -
>   	/*
>   	 * Skip on function != 0, as the TL can only be defined on 0
>   	 */
>   	if (PCI_FUNC(dev->devfn) != 0)
>   		return 0;
> 
> -	recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
> -	if (!recv_rate)
> -		return -ENOMEM;
> -	/*
> -	 * The spec defines 64 templates for messages in the
> -	 * Transaction Layer (TL).
> -	 *
> -	 * The host and device each support a subset, so we need to
> -	 * configure the transmitters on each side to send only
> -	 * templates the receiver understands, at a rate the receiver
> -	 * can process.  Per the spec, template 0 must be supported by
> -	 * everybody. That's the template which has been used by the
> -	 * host and device so far.
> -	 *
> -	 * The sending rate limit must be set before the template is
> -	 * enabled.
> -	 */
> -
> -	/*
> -	 * Device -> host
> -	 */
> -	rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate,
> -				PNV_OCXL_TL_RATE_BUF_SIZE);
> -	if (rc)
> -		goto out;
> -
> -	for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
> -		be32ptr = (__be32 *) &recv_rate[i];
> -		pci_write_config_dword(dev,
> -				tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
> -				be32_to_cpu(*be32ptr));
> -	}
> -	val = recv_cap >> 32;
> -	pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
> -	val = recv_cap & GENMASK(31, 0);
> -	pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val);
> -
> -	/*
> -	 * Host -> device
> -	 */
> -	for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
> -		pci_read_config_dword(dev,
> -				tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
> -				&val);
> -		be32ptr = (__be32 *) &recv_rate[i];
> -		*be32ptr = cpu_to_be32(val);
> -	}
> -	pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
> -	recv_cap = (long) val << 32;
> -	pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val);
> -	recv_cap |= val;
> -
> -	rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate),
> -				PNV_OCXL_TL_RATE_BUF_SIZE);
> -	if (rc)
> -		goto out;
> -
> -	/*
> -	 * Opencapi commands needing to be retried are classified per
> -	 * the TL in 2 groups: short and long commands.
> -	 *
> -	 * The short back off timer it not used for now. It will be
> -	 * for opencapi 4.0.
> -	 *
> -	 * The long back off timer is typically used when an AFU hits
> -	 * a page fault but the NPU is already processing one. So the
> -	 * AFU needs to wait before it can resubmit. Having a value
> -	 * too low doesn't break anything, but can generate extra
> -	 * traffic on the link.
> -	 * We set it to 1.6 us for now. It's shorter than, but in the
> -	 * same order of magnitude as the time spent to process a page
> -	 * fault.
> -	 */
> -	timers = 0x2 << 4; /* long timer = 1.6 us */
> -	pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
> -			timers);
> -
> -	rc = 0;
> -out:
> -	kfree(recv_rate);
> -	return rc;
> +	return pnv_ocxl_set_TL(dev, tl_dvsec);
>   }
>   EXPORT_SYMBOL_GPL(ocxl_config_set_TL);
>