[PATCH 2/2 v2] powerpc/powernv: Enable and setup PCI P2P

Frederic Barrat fbarrat at linux.ibm.com
Wed Aug 12 03:25:10 AEST 2020



Le 03/08/2020 à 09:35, Oliver O'Halloran a écrit :
> On Thu, Apr 30, 2020 at 11:15 PM Max Gurtovoy <maxg at mellanox.com> wrote:
>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>> index 57d3a6a..9ecc576 100644
>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>> @@ -3706,18 +3706,208 @@ static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
>>          }
>>   }
>>
>> +#ifdef CONFIG_PCI_P2PDMA
>> +static DEFINE_MUTEX(p2p_mutex);
>> +
>> +static bool pnv_pci_controller_owns_addr(struct pci_controller *hose,
>> +                                        phys_addr_t addr, size_t size)
>> +{
>> +       int i;
>> +
>> +       /*
>> +        * It seems safe to assume the full range is under the same PHB, so we
>> +        * can ignore the size.
>> +        */
>> +       for (i = 0; i < ARRAY_SIZE(hose->mem_resources); i++) {
>> +               struct resource *res = &hose->mem_resources[i];
>> +
>> +               if (res->flags && addr >= res->start && addr < res->end)
>> +                       return true;
>> +       }
>> +       return false;
>> +}
>> +
>> +/*
>> + * find the phb owning a mmio address if not owned locally
>> + */
>> +static struct pnv_phb *pnv_pci_find_owning_phb(struct pci_dev *pdev,
>> +                                              phys_addr_t addr, size_t size)
>> +{
>> +       struct pci_controller *hose;
>> +
>> +       /* fast path */
>> +       if (pnv_pci_controller_owns_addr(pdev->bus->sysdata, addr, size))
>> +               return NULL;
> 
> Do we actually need this fast path? It's going to be slow either way.
> Also if a device is doing p2p to another device under the same PHB
> then it should not be happening via the root complex. Is this a case
> you've tested?


The "fast path" comment is misleading and we should rephrase. The point 
is to catch if we're mapping a resource under the same PHB, in which 
case we don't modify the PHB configuration. So we need to catch it 
early, but it's not a fast path.
If the 2 devices are under the same PHB, the code above shouldn't do 
anything. So I guess behavior depends on the underlying bridge? We'll 
need another platform than witherspoon to test it. Probably worth checking.


>> +       list_for_each_entry(hose, &hose_list, list_node) {
>> +               struct pnv_phb *phb = hose->private_data;
>> +
>> +               if (phb->type != PNV_PHB_NPU_NVLINK &&
>> +                   phb->type != PNV_PHB_NPU_OCAPI) {
>> +                       if (pnv_pci_controller_owns_addr(hose, addr, size))
>> +                               return phb;
>> +               }
>> +       }
>> +       return NULL;
>> +}
>> +
>> +static u64 pnv_pci_dma_dir_to_opal_p2p(enum dma_data_direction dir)
>> +{
>> +       if (dir == DMA_TO_DEVICE)
>> +               return OPAL_PCI_P2P_STORE;
>> +       else if (dir == DMA_FROM_DEVICE)
>> +               return OPAL_PCI_P2P_LOAD;
>> +       else if (dir == DMA_BIDIRECTIONAL)
>> +               return OPAL_PCI_P2P_LOAD | OPAL_PCI_P2P_STORE;
>> +       else
>> +               return 0;
>> +}
>> +
>> +static int pnv_pci_ioda_enable_p2p(struct pci_dev *initiator,
>> +                                  struct pnv_phb *phb_target,
>> +                                  enum dma_data_direction dir)
>> +{
>> +       struct pci_controller *hose;
>> +       struct pnv_phb *phb_init;
>> +       struct pnv_ioda_pe *pe_init;
>> +       u64 desc;
>> +       int rc;
>> +
>> +       if (!opal_check_token(OPAL_PCI_SET_P2P))
>> +               return -ENXIO;
>> +
> 
>> +       hose = pci_bus_to_host(initiator->bus);
>> +       phb_init = hose->private_data;
> 
> You can use the pci_bus_to_pnvhb() helper
> 
>> +
>> +       pe_init = pnv_ioda_get_pe(initiator);
>> +       if (!pe_init)
>> +               return -ENODEV;
>> +
>> +       if (!pe_init->tce_bypass_enabled)
>> +               return -EINVAL;
>> +
>> +       /*
>> +        * Configuring the initiator's PHB requires to adjust its TVE#1
>> +        * setting. Since the same device can be an initiator several times for
>> +        * different target devices, we need to keep a reference count to know
>> +        * when we can restore the default bypass setting on its TVE#1 when
>> +        * disabling. Opal is not tracking PE states, so we add a reference
>> +        * count on the PE in linux.
>> +        *
>> +        * For the target, the configuration is per PHB, so we keep a
>> +        * target reference count on the PHB.
>> +        */
> 
> This irks me a bit because configuring the DMA address limits for the
> TVE is the kernel's job. What we really should be doing is using
> opal_pci_map_pe_dma_window_real() to set the bypass-mode address limit
> for the TVE to something large enough to hit the MMIO ranges rather
> than having set_p2p do it as a side effect. Unfortunately, for some
> reason skiboot doesn't implement support for enabling 56bit addressing
> using opal_pci_map_pe_dma_window_real() and we do need to support
> older kernel's which used this stuff so I guess we're stuck with it
> for now. It'd be nice if we could fix this in the longer term
> though...


OK. We'd need more than a 56-bit opal_pci_map_pe_dma_window_real() 
though, there's also a queue setting change on the target PHB.

   Fred


>> +       mutex_lock(&p2p_mutex);
>> +
>> +       desc = OPAL_PCI_P2P_ENABLE | pnv_pci_dma_dir_to_opal_p2p(dir);
>> +       /* always go to opal to validate the configuration */
>> +       rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id, desc,
>> +                             pe_init->pe_number);
>> +       if (rc != OPAL_SUCCESS) {
>> +               rc = -EIO;
>> +               goto out;
>> +       }
>> +
>> +       pe_init->p2p_initiator_count++;
>> +       phb_target->p2p_target_count++;
>> +
>> +       rc = 0;
>> +out:
>> +       mutex_unlock(&p2p_mutex);
>> +       return rc;
>> +}
>> +
>> +static int pnv_pci_dma_map_resource(struct pci_dev *pdev,
>> +                                   phys_addr_t phys_addr, size_t size,
>> +                                   enum dma_data_direction dir)
>> +{
>> +       struct pnv_phb *target_phb;
>> +
>> +       target_phb = pnv_pci_find_owning_phb(pdev, phys_addr, size);
>> +       if (!target_phb)
>> +               return 0;
>> +
>> +       return pnv_pci_ioda_enable_p2p(pdev, target_phb, dir);
>> +}
>> +
>> +static int pnv_pci_ioda_disable_p2p(struct pci_dev *initiator,
>> +               struct pnv_phb *phb_target)
>> +{
>> +       struct pci_controller *hose;
>> +       struct pnv_phb *phb_init;
>> +       struct pnv_ioda_pe *pe_init;
>> +       int rc;
>> +
>> +       if (!opal_check_token(OPAL_PCI_SET_P2P))
>> +               return -ENXIO;
> 
> This should probably have a WARN_ON() since we can't hit this path
> unless the initial map succeeds.
> 
>> +       hose = pci_bus_to_host(initiator->bus);
>> +       phb_init = hose->private_data;
> 
> pci_bus_to_pnvhb()
> 
>> +       pe_init = pnv_ioda_get_pe(initiator);
>> +       if (!pe_init)
>> +               return -ENODEV;
>> +
>> +       mutex_lock(&p2p_mutex);
>> +
>> +       if (!pe_init->p2p_initiator_count || !phb_target->p2p_target_count) {
>> +               rc = -EINVAL;
>> +               goto out;
>> +       }
>> +
>> +       if (--pe_init->p2p_initiator_count == 0)
>> +               pnv_pci_ioda2_set_bypass(pe_init, true);
>> +
>> +       if (--phb_target->p2p_target_count == 0) {
>> +               rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id,
>> +                                     0, pe_init->pe_number);
>> +               if (rc != OPAL_SUCCESS) {
>> +                       rc = -EIO;
>> +                       goto out;
>> +               }
>> +       }
>> +
>> +       rc = 0;
>> +out:
>> +       mutex_unlock(&p2p_mutex);
>> +       return rc;
>> +}
>> +
>> +static void pnv_pci_dma_unmap_resource(struct pci_dev *pdev,
>> +                                      dma_addr_t addr, size_t size,
>> +                                      enum dma_data_direction dir)
>> +{
>> +       struct pnv_phb *target_phb;
>> +       int rc;
>> +
>> +       target_phb = pnv_pci_find_owning_phb(pdev, addr, size);
>> +       if (!target_phb)
>> +               return;
>> +
>> +       rc = pnv_pci_ioda_disable_p2p(pdev, target_phb);
>> +       if (rc)
>> +               dev_err(&pdev->dev, "Failed to undo PCI peer-to-peer setup for address %llx: %d\n",
>> +                       addr, rc);
> 
> Use pci_err() or pe_err().
> 


More information about the Linuxppc-dev mailing list