[PATCH 2/2] powerpc/powernv: ocxl move TL definition
christophe lombard
clombard at linux.vnet.ibm.com
Tue Oct 15 02:32:59 AEDT 2019
On 14/10/2019 12:21, Frederic Barrat wrote:
>
>
> Le 09/10/2019 à 17:11, christophe lombard a écrit :
>> Specifies the templates in the Transaction Layer that the OpenCAPI
>> device/host
>> support when transmitting/receiving DL/DLX frames to or from the OpenCAPI
>> device/host.
>> Update, rename and create new few platform-specific calls which can be
>> used by
>> drivers.
>>
>> No functional change.
>>
>> Signed-off-by: Christophe Lombard <clombard at linux.vnet.ibm.com>
>> ---
>> arch/powerpc/include/asm/pnv-ocxl.h | 5 +-
>> arch/powerpc/platforms/powernv/ocxl.c | 103 ++++++++++++++++++++++++--
>> drivers/misc/ocxl/config.c | 89 +---------------------
>> 3 files changed, 99 insertions(+), 98 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/pnv-ocxl.h
>> b/arch/powerpc/include/asm/pnv-ocxl.h
>> index 8e516e339e6c..b8c68878b4ba 100644
>> --- a/arch/powerpc/include/asm/pnv-ocxl.h
>> +++ b/arch/powerpc/include/asm/pnv-ocxl.h
>> @@ -13,10 +13,7 @@ extern int pnv_ocxl_get_actag(struct pci_dev *dev,
>> u16 *base, u16 *enabled,
>> u16 *supported);
>> extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);
>>
>> -extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
>> - char *rate_buf, int rate_buf_size);
>> -extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
>> - uint64_t rate_buf_phys, int rate_buf_size);
>> +extern int pnv_ocxl_set_TL(struct pci_dev *dev, int tl_dvsec);
>>
>> extern int pnv_ocxl_platform_setup(struct pci_dev *dev,
>> int PE_mask, int *hwirq,
>> diff --git a/arch/powerpc/platforms/powernv/ocxl.c
>> b/arch/powerpc/platforms/powernv/ocxl.c
>> index 4d26cba12b63..351324cffc2b 100644
>> --- a/arch/powerpc/platforms/powernv/ocxl.c
>> +++ b/arch/powerpc/platforms/powernv/ocxl.c
>> @@ -369,8 +369,8 @@ static void set_templ_rate(unsigned int templ,
>> unsigned int rate, char *buf)
>> buf[idx] |= rate << shift;
>> }
>>
>> -int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
>> - char *rate_buf, int rate_buf_size)
>> +static int get_tl_cap(struct pci_dev *dev, long *cap,
>> + char *rate_buf, int rate_buf_size)
>> {
>> if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
>> return -EINVAL;
>> @@ -390,10 +390,9 @@ int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long
>> *cap,
>> *cap = PNV_OCXL_TL_P9_RECV_CAP;
>> return 0;
>> }
>> -EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
>>
>> -int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
>> - uint64_t rate_buf_phys, int rate_buf_size)
>> +static int set_tl_conf(struct pci_dev *dev, long cap,
>> + uint64_t rate_buf_phys, int rate_buf_size)
>> {
>> struct pci_controller *hose = pci_bus_to_host(dev->bus);
>> struct pnv_phb *phb = hose->private_data;
>> @@ -410,7 +409,99 @@ int pnv_ocxl_set_tl_conf(struct pci_dev *dev,
>> long cap,
>> }
>> return 0;
>> }
>> -EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
>> +
>> +int pnv_ocxl_set_TL(struct pci_dev *dev, int tl_dvsec)
>> +{
>> + u32 val;
>> + __be32 *be32ptr;
>> + u8 timers;
>> + int i, rc;
>> + long recv_cap;
>> + char *recv_rate;
>> +
>> + recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
>> + if (!recv_rate)
>> + return -ENOMEM;
>> + /*
>> + * The spec defines 64 templates for messages in the
>> + * Transaction Layer (TL).
>> + *
>> + * The host and device each support a subset, so we need to
>> + * configure the transmitters on each side to send only
>> + * templates the receiver understands, at a rate the receiver
>> + * can process. Per the spec, template 0 must be supported by
>> + * everybody. That's the template which has been used by the
>> + * host and device so far.
>> + *
>> + * The sending rate limit must be set before the template is
>> + * enabled.
>> + */
>> +
>> + /*
>> + * Device -> host
>> + */
>> + rc = get_tl_cap(dev, &recv_cap, recv_rate,
>> + PNV_OCXL_TL_RATE_BUF_SIZE);
>> + if (rc)
>> + goto out;
>> +
>> + for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
>> + be32ptr = (__be32 *) &recv_rate[i];
>> + pci_write_config_dword(dev,
>> + tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
>> + be32_to_cpu(*be32ptr));
>> + }
>> + val = recv_cap >> 32;
>> + pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
>> + val = recv_cap & GENMASK(31, 0);
>> + pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP +
>> 4, val);
>> +
>> + /*
>> + * Host -> device
>> + */
>> + for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
>> + pci_read_config_dword(dev,
>> + tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
>> + &val);
>> + be32ptr = (__be32 *) &recv_rate[i];
>> + *be32ptr = cpu_to_be32(val);
>> + }
>> + pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
>> + recv_cap = (long) val << 32;
>> + pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4,
>> &val);
>> + recv_cap |= val;
>> +
>> + rc = set_tl_conf(dev, recv_cap, __pa(recv_rate),
>> + PNV_OCXL_TL_RATE_BUF_SIZE);
>> + if (rc)
>> + goto out;
>> +
>> + /*
>> + * Opencapi commands needing to be retried are classified per
>> + * the TL in 2 groups: short and long commands.
>> + *
>> + * The short back off timer it not used for now. It will be
>> + * for opencapi 4.0.
>> + *
>> + * The long back off timer is typically used when an AFU hits
>> + * a page fault but the NPU is already processing one. So the
>> + * AFU needs to wait before it can resubmit. Having a value
>> + * too low doesn't break anything, but can generate extra
>> + * traffic on the link.
>> + * We set it to 1.6 us for now. It's shorter than, but in the
>> + * same order of magnitude as the time spent to process a page
>> + * fault.
>> + */
>> + timers = 0x2 << 4; /* long timer = 1.6 us */
>> + pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
>> + timers);
>> +
>
>
> How does it work in the virtualized case? We would also need to do those
> config space reads and writes. I'm guessing it's all handled in the host
> behind a hcall, as we don't really want to have the guest mess with the
> link configuration?
>
A specific option (H_CONFIG_ADAPTER_SET_TL) through the hcall
H_OCXL_CONFIG_ADAPTER allows the guest to call pnv_ocxl_set_TL(), like
the ocxl driver running on the host.
All new pnv_* api have been created to configurate and handle the capi
device for the ocxl driver (running on the host) and for the guest,
through a new vfio driver. This new vfio driver will be in charge,
according the hcall options, to call the right api.
> Fred
>
>
>> + rc = 0;
>> +out:
>> + kfree(recv_rate);
>> + return rc;
>> +}
>> +EXPORT_SYMBOL_GPL(pnv_ocxl_set_TL);
>>
>> static int get_xsl_irq(struct pci_dev *dev, int *hwirq)
>> {
>> diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
>> index c8e19bfb5ef9..7ca0f6744125 100644
>> --- a/drivers/misc/ocxl/config.c
>> +++ b/drivers/misc/ocxl/config.c
>> @@ -709,100 +709,13 @@ EXPORT_SYMBOL_GPL(ocxl_config_set_afu_state);
>>
>> int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
>> {
>> - u32 val;
>> - __be32 *be32ptr;
>> - u8 timers;
>> - int i, rc;
>> - long recv_cap;
>> - char *recv_rate;
>> -
>> /*
>> * Skip on function != 0, as the TL can only be defined on 0
>> */
>> if (PCI_FUNC(dev->devfn) != 0)
>> return 0;
>>
>> - recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
>> - if (!recv_rate)
>> - return -ENOMEM;
>> - /*
>> - * The spec defines 64 templates for messages in the
>> - * Transaction Layer (TL).
>> - *
>> - * The host and device each support a subset, so we need to
>> - * configure the transmitters on each side to send only
>> - * templates the receiver understands, at a rate the receiver
>> - * can process. Per the spec, template 0 must be supported by
>> - * everybody. That's the template which has been used by the
>> - * host and device so far.
>> - *
>> - * The sending rate limit must be set before the template is
>> - * enabled.
>> - */
>> -
>> - /*
>> - * Device -> host
>> - */
>> - rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate,
>> - PNV_OCXL_TL_RATE_BUF_SIZE);
>> - if (rc)
>> - goto out;
>> -
>> - for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
>> - be32ptr = (__be32 *) &recv_rate[i];
>> - pci_write_config_dword(dev,
>> - tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
>> - be32_to_cpu(*be32ptr));
>> - }
>> - val = recv_cap >> 32;
>> - pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
>> - val = recv_cap & GENMASK(31, 0);
>> - pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP +
>> 4, val);
>> -
>> - /*
>> - * Host -> device
>> - */
>> - for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
>> - pci_read_config_dword(dev,
>> - tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
>> - &val);
>> - be32ptr = (__be32 *) &recv_rate[i];
>> - *be32ptr = cpu_to_be32(val);
>> - }
>> - pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
>> - recv_cap = (long) val << 32;
>> - pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4,
>> &val);
>> - recv_cap |= val;
>> -
>> - rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate),
>> - PNV_OCXL_TL_RATE_BUF_SIZE);
>> - if (rc)
>> - goto out;
>> -
>> - /*
>> - * Opencapi commands needing to be retried are classified per
>> - * the TL in 2 groups: short and long commands.
>> - *
>> - * The short back off timer it not used for now. It will be
>> - * for opencapi 4.0.
>> - *
>> - * The long back off timer is typically used when an AFU hits
>> - * a page fault but the NPU is already processing one. So the
>> - * AFU needs to wait before it can resubmit. Having a value
>> - * too low doesn't break anything, but can generate extra
>> - * traffic on the link.
>> - * We set it to 1.6 us for now. It's shorter than, but in the
>> - * same order of magnitude as the time spent to process a page
>> - * fault.
>> - */
>> - timers = 0x2 << 4; /* long timer = 1.6 us */
>> - pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
>> - timers);
>> -
>> - rc = 0;
>> -out:
>> - kfree(recv_rate);
>> - return rc;
>> + return pnv_ocxl_set_TL(dev, tl_dvsec);
>> }
>> EXPORT_SYMBOL_GPL(ocxl_config_set_TL);
>>
More information about the Linuxppc-dev
mailing list