[PATCH 4/7] powerpc/powernv/pci: Add device to iommu group during dma_dev_setup()
Alexey Kardashevskiy
aik at ozlabs.ru
Mon Apr 6 19:51:59 AEST 2020
On 06/04/2020 13:07, Oliver O'Halloran wrote:
> Historically adding devices to their respective iommu group has been
> handled by the post-init phb fixup for most devices. This was done
> because:
>
> 1) The IOMMU group is tied to the PE (usually) so we can only setup the
> iommu groups after we've done resource allocation since BAR location
> determines the device's PE, and:
> 2) The sysfs directory for the pci_dev needs to be available since
> iommu_add_device() wants to add an attribute for the iommu group.
>
> However, since commit 30d87ef8b38d ("powerpc/pci: Fix
> pcibios_setup_device() ordering") both conditions are met when
> hose->ops->dma_dev_setup() is called so there's no real need to do
> this in the fixup.
>
> Moving the call to iommu_add_device() into pnv_pci_ioda_dma_setup_dev()
> is a nice cleanup since it puts all the per-device IOMMU setup into one
> place. It also results in all (non-nvlink) devices getting their iommu
> group via a common path rather than relying on the bus notifier hack
> in pnv_tce_iommu_bus_notifier() to handle the adding VFs and
> hotplugged devices to their group.
>
> Cc: Alexey Kardashevskiy <aik at ozlabs.ru>
> Signed-off-by: Oliver O'Halloran <oohall at gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> ---
> arch/powerpc/platforms/powernv/npu-dma.c | 8 ++++
> arch/powerpc/platforms/powernv/pci-ioda.c | 47 +++++++----------------
> arch/powerpc/platforms/powernv/pci.c | 20 ----------
> 3 files changed, 21 insertions(+), 54 deletions(-)
>
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> index 4fbbdfa8b327..df27b8d7e78f 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -469,6 +469,12 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
> compound_group->pgsizes = pe->table_group.pgsizes;
> }
>
> + /*
> + * The gpu would have been added to the iommu group that's created
> + * for the PE. Pull it out now.
> + */
> + iommu_del_device(&gpdev->dev);
> +
> /*
> * I'm not sure this is strictly required, but it's probably a good idea
> * since the table_group for the PE is going to be attached to the
> @@ -478,7 +484,9 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
> */
> iommu_group_put(pe->table_group.group);
>
> + /* now put the GPU into the compound group */
> pnv_comp_attach_table_group(npucomp, pe);
> + iommu_add_device(compound_group, &gpdev->dev);
>
> return compound_group;
> }
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index cf0aaef1b8fa..9198b7882b57 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1774,12 +1774,10 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
> WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
> pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
> set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
> - /*
> - * Note: iommu_add_device() will fail here as
> - * for physical PE: the device is already added by now;
> - * for virtual PE: sysfs entries are not ready yet and
> - * tce_iommu_bus_notifier will add the device to a group later.
> - */
> +
> + /* PEs with a DMA weight of zero won't have a group */
> + if (pe->table_group.group)
> + iommu_add_device(&pe->table_group, &pdev->dev);
> }
>
> /*
> @@ -2628,39 +2626,20 @@ static void pnv_pci_ioda_setup_iommu_api(void)
> struct pnv_ioda_pe *pe;
>
> /*
> - * There are 4 types of PEs:
> - * - PNV_IODA_PE_BUS: a downstream port with an adapter,
> - * created from pnv_pci_setup_bridge();
> - * - PNV_IODA_PE_BUS_ALL: a PCI-PCIX bridge with devices behind it,
> - * created from pnv_pci_setup_bridge();
> - * - PNV_IODA_PE_VF: a SRIOV virtual function,
> - * created from pnv_pcibios_sriov_enable();
> - * - PNV_IODA_PE_DEV: an NPU or OCAPI device,
> - * created from pnv_pci_ioda_fixup().
> + * For non-nvlink devices the IOMMU group is registered when the PE is
> + * configured and devices are added to the group when the per-device
> + * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is
> + * only initialise for "normal" IODA PHBs.
> *
> - * Normally a PE is represented by an IOMMU group, however for
> - * devices with side channels the groups need to be more strict.
> + * For NVLink devices we need to ensure the NVLinks and the GPU end up
> + * in the same IOMMU group, so that's handled here.
> */
> list_for_each_entry(hose, &hose_list, list_node) {
> phb = hose->private_data;
>
> - if (phb->type == PNV_PHB_NPU_NVLINK ||
> - phb->type == PNV_PHB_NPU_OCAPI)
> - continue;
> -
> - list_for_each_entry(pe, &phb->ioda.pe_list, list) {
> - struct iommu_table_group *table_group;
> -
> - table_group = pnv_try_setup_npu_table_group(pe);
> - if (!table_group) {
> - if (!pnv_pci_ioda_pe_dma_weight(pe))
> - continue;
> -
> - table_group = &pe->table_group;
> - }
> - pnv_ioda_setup_bus_iommu_group(pe, table_group,
> - pe->pbus);
> - }
> + if (phb->type == PNV_PHB_IODA2)
> + list_for_each_entry(pe, &phb->ioda.pe_list, list)
> + pnv_try_setup_npu_table_group(pe);
> }
>
> /*
> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
> index 5bf818246339..091fe1cf386b 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -955,28 +955,8 @@ static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb,
> unsigned long action, void *data)
> {
> struct device *dev = data;
> - struct pci_dev *pdev;
> - struct pci_dn *pdn;
> - struct pnv_ioda_pe *pe;
> - struct pci_controller *hose;
> - struct pnv_phb *phb;
>
> switch (action) {
> - case BUS_NOTIFY_ADD_DEVICE:
> - pdev = to_pci_dev(dev);
> - pdn = pci_get_pdn(pdev);
> - hose = pci_bus_to_host(pdev->bus);
> - phb = hose->private_data;
> -
> - WARN_ON_ONCE(!phb);
> - if (!pdn || pdn->pe_number == IODA_INVALID_PE || !phb)
> - return 0;
> -
> - pe = &phb->ioda.pe_array[pdn->pe_number];
> - if (!pe->table_group.group)
> - return 0;
> - iommu_add_device(&pe->table_group, dev);
> - return 0;
> case BUS_NOTIFY_DEL_DEVICE:
> iommu_del_device(dev);
> return 0;
>
--
Alexey
More information about the Linuxppc-dev
mailing list