[PATCH 2/2 v2] powerpc/powernv: Enable and setup PCI P2P
Frederic Barrat
fbarrat at linux.ibm.com
Wed Aug 12 03:25:10 AEST 2020
Le 03/08/2020 à 09:35, Oliver O'Halloran a écrit :
> On Thu, Apr 30, 2020 at 11:15 PM Max Gurtovoy <maxg at mellanox.com> wrote:
>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>> index 57d3a6a..9ecc576 100644
>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>> @@ -3706,18 +3706,208 @@ static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
>> }
>> }
>>
>> +#ifdef CONFIG_PCI_P2PDMA
>> +static DEFINE_MUTEX(p2p_mutex);
>> +
>> +static bool pnv_pci_controller_owns_addr(struct pci_controller *hose,
>> + phys_addr_t addr, size_t size)
>> +{
>> + int i;
>> +
>> + /*
>> + * It seems safe to assume the full range is under the same PHB, so we
>> + * can ignore the size.
>> + */
>> + for (i = 0; i < ARRAY_SIZE(hose->mem_resources); i++) {
>> + struct resource *res = &hose->mem_resources[i];
>> +
>> + if (res->flags && addr >= res->start && addr < res->end)
>> + return true;
>> + }
>> + return false;
>> +}
>> +
>> +/*
>> + * find the phb owning a mmio address if not owned locally
>> + */
>> +static struct pnv_phb *pnv_pci_find_owning_phb(struct pci_dev *pdev,
>> + phys_addr_t addr, size_t size)
>> +{
>> + struct pci_controller *hose;
>> +
>> + /* fast path */
>> + if (pnv_pci_controller_owns_addr(pdev->bus->sysdata, addr, size))
>> + return NULL;
>
> Do we actually need this fast path? It's going to be slow either way.
> Also if a device is doing p2p to another device under the same PHB
> then it should not be happening via the root complex. Is this a case
> you've tested?
The "fast path" comment is misleading and we should rephrase. The point
is to catch if we're mapping a resource under the same PHB, in which
case we don't modify the PHB configuration. So we need to catch it
early, but it's not a fast path.
If the 2 devices are under the same PHB, the code above shouldn't do
anything. So I guess behavior depends on the underlying bridge? We'll
need another platform than witherspoon to test it. Probably worth checking.
>> + list_for_each_entry(hose, &hose_list, list_node) {
>> + struct pnv_phb *phb = hose->private_data;
>> +
>> + if (phb->type != PNV_PHB_NPU_NVLINK &&
>> + phb->type != PNV_PHB_NPU_OCAPI) {
>> + if (pnv_pci_controller_owns_addr(hose, addr, size))
>> + return phb;
>> + }
>> + }
>> + return NULL;
>> +}
>> +
>> +static u64 pnv_pci_dma_dir_to_opal_p2p(enum dma_data_direction dir)
>> +{
>> + if (dir == DMA_TO_DEVICE)
>> + return OPAL_PCI_P2P_STORE;
>> + else if (dir == DMA_FROM_DEVICE)
>> + return OPAL_PCI_P2P_LOAD;
>> + else if (dir == DMA_BIDIRECTIONAL)
>> + return OPAL_PCI_P2P_LOAD | OPAL_PCI_P2P_STORE;
>> + else
>> + return 0;
>> +}
>> +
>> +static int pnv_pci_ioda_enable_p2p(struct pci_dev *initiator,
>> + struct pnv_phb *phb_target,
>> + enum dma_data_direction dir)
>> +{
>> + struct pci_controller *hose;
>> + struct pnv_phb *phb_init;
>> + struct pnv_ioda_pe *pe_init;
>> + u64 desc;
>> + int rc;
>> +
>> + if (!opal_check_token(OPAL_PCI_SET_P2P))
>> + return -ENXIO;
>> +
>
>> + hose = pci_bus_to_host(initiator->bus);
>> + phb_init = hose->private_data;
>
> You can use the pci_bus_to_pnvhb() helper
>
>> +
>> + pe_init = pnv_ioda_get_pe(initiator);
>> + if (!pe_init)
>> + return -ENODEV;
>> +
>> + if (!pe_init->tce_bypass_enabled)
>> + return -EINVAL;
>> +
>> + /*
>> + * Configuring the initiator's PHB requires to adjust its TVE#1
>> + * setting. Since the same device can be an initiator several times for
>> + * different target devices, we need to keep a reference count to know
>> + * when we can restore the default bypass setting on its TVE#1 when
>> + * disabling. Opal is not tracking PE states, so we add a reference
>> + * count on the PE in linux.
>> + *
>> + * For the target, the configuration is per PHB, so we keep a
>> + * target reference count on the PHB.
>> + */
>
> This irks me a bit because configuring the DMA address limits for the
> TVE is the kernel's job. What we really should be doing is using
> opal_pci_map_pe_dma_window_real() to set the bypass-mode address limit
> for the TVE to something large enough to hit the MMIO ranges rather
> than having set_p2p do it as a side effect. Unfortunately, for some
> reason skiboot doesn't implement support for enabling 56bit addressing
> using opal_pci_map_pe_dma_window_real() and we do need to support
> older kernel's which used this stuff so I guess we're stuck with it
> for now. It'd be nice if we could fix this in the longer term
> though...
OK. We'd need more than a 56-bit opal_pci_map_pe_dma_window_real()
though, there's also a queue setting change on the target PHB.
Fred
>> + mutex_lock(&p2p_mutex);
>> +
>> + desc = OPAL_PCI_P2P_ENABLE | pnv_pci_dma_dir_to_opal_p2p(dir);
>> + /* always go to opal to validate the configuration */
>> + rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id, desc,
>> + pe_init->pe_number);
>> + if (rc != OPAL_SUCCESS) {
>> + rc = -EIO;
>> + goto out;
>> + }
>> +
>> + pe_init->p2p_initiator_count++;
>> + phb_target->p2p_target_count++;
>> +
>> + rc = 0;
>> +out:
>> + mutex_unlock(&p2p_mutex);
>> + return rc;
>> +}
>> +
>> +static int pnv_pci_dma_map_resource(struct pci_dev *pdev,
>> + phys_addr_t phys_addr, size_t size,
>> + enum dma_data_direction dir)
>> +{
>> + struct pnv_phb *target_phb;
>> +
>> + target_phb = pnv_pci_find_owning_phb(pdev, phys_addr, size);
>> + if (!target_phb)
>> + return 0;
>> +
>> + return pnv_pci_ioda_enable_p2p(pdev, target_phb, dir);
>> +}
>> +
>> +static int pnv_pci_ioda_disable_p2p(struct pci_dev *initiator,
>> + struct pnv_phb *phb_target)
>> +{
>> + struct pci_controller *hose;
>> + struct pnv_phb *phb_init;
>> + struct pnv_ioda_pe *pe_init;
>> + int rc;
>> +
>> + if (!opal_check_token(OPAL_PCI_SET_P2P))
>> + return -ENXIO;
>
> This should probably have a WARN_ON() since we can't hit this path
> unless the initial map succeeds.
>
>> + hose = pci_bus_to_host(initiator->bus);
>> + phb_init = hose->private_data;
>
> pci_bus_to_pnvhb()
>
>> + pe_init = pnv_ioda_get_pe(initiator);
>> + if (!pe_init)
>> + return -ENODEV;
>> +
>> + mutex_lock(&p2p_mutex);
>> +
>> + if (!pe_init->p2p_initiator_count || !phb_target->p2p_target_count) {
>> + rc = -EINVAL;
>> + goto out;
>> + }
>> +
>> + if (--pe_init->p2p_initiator_count == 0)
>> + pnv_pci_ioda2_set_bypass(pe_init, true);
>> +
>> + if (--phb_target->p2p_target_count == 0) {
>> + rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id,
>> + 0, pe_init->pe_number);
>> + if (rc != OPAL_SUCCESS) {
>> + rc = -EIO;
>> + goto out;
>> + }
>> + }
>> +
>> + rc = 0;
>> +out:
>> + mutex_unlock(&p2p_mutex);
>> + return rc;
>> +}
>> +
>> +static void pnv_pci_dma_unmap_resource(struct pci_dev *pdev,
>> + dma_addr_t addr, size_t size,
>> + enum dma_data_direction dir)
>> +{
>> + struct pnv_phb *target_phb;
>> + int rc;
>> +
>> + target_phb = pnv_pci_find_owning_phb(pdev, addr, size);
>> + if (!target_phb)
>> + return;
>> +
>> + rc = pnv_pci_ioda_disable_p2p(pdev, target_phb);
>> + if (rc)
>> + dev_err(&pdev->dev, "Failed to undo PCI peer-to-peer setup for address %llx: %d\n",
>> + addr, rc);
>
> Use pci_err() or pe_err().
>
More information about the Linuxppc-dev
mailing list