[PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
Alexey Kardashevskiy
aik at ozlabs.ru
Tue Jul 14 15:37:06 AEST 2020
On 10/07/2020 15:23, Oliver O'Halloran wrote:
> There's an optimisation in the PE setup which skips performing DMA
> setup for a PE if we only have bridges in a PE. The assumption being
> that only "real" devices will DMA to system memory, which is probably
> fair. However, if we start off with only bridge devices in a PE then
> add a non-bridge device the new device won't be able to use DMA because
> we never configured it.
>
> Fix this (admittedly pretty weird) edge case by tracking whether we've done
> the DMA setup for the PE or not. If a non-bridge device is added to the PE
> (via rescan or hotplug, or whatever) we can set up DMA on demand.
So hotplug does not work on powernv then, right? I thought you tested it
a while ago, or this patch is the result of that attempt? If it is, then
Reviewed-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> This also means the only remaining user of the old "DMA Weight" code is
> the IODA1 DMA setup code that it was originally added for, which is good.
Is ditching IODA1 in the plan? :)
>
> Cc: Alexey Kardashevskiy <aik at ozlabs.ru>
> Signed-off-by: Oliver O'Halloran <oohall at gmail.com>
> ---
> Alexey, do we need to have the IOMMU API stuff set/clear this flag?
I'd say no as that API only cares if a device is in a PE and for those
the PE DMA setup optimization is skipped. Thanks,
> ---
> arch/powerpc/platforms/powernv/pci-ioda.c | 48 ++++++++++++++---------
> arch/powerpc/platforms/powernv/pci.h | 7 ++++
> 2 files changed, 36 insertions(+), 19 deletions(-)
>
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index bfb40607aa0e..bb9c1cc60c33 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -141,6 +141,7 @@ static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
>
> phb->ioda.pe_array[pe_no].phb = phb;
> phb->ioda.pe_array[pe_no].pe_number = pe_no;
> + phb->ioda.pe_array[pe_no].dma_setup_done = false;
>
> /*
> * Clear the PE frozen state as it might be put into frozen state
> @@ -1685,6 +1686,12 @@ static int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
> }
> #endif /* CONFIG_PCI_IOV */
>
> +static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
> + struct pnv_ioda_pe *pe);
> +
> +static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> + struct pnv_ioda_pe *pe);
> +
> static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
> {
> struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
> @@ -1713,6 +1720,24 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
> pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number);
> }
>
> + /*
> + * We assume that bridges *probably* don't need to do any DMA so we can
> + * skip allocating a TCE table, etc unless we get a non-bridge device.
> + */
> + if (!pe->dma_setup_done && !pci_is_bridge(pdev)) {
> + switch (phb->type) {
> + case PNV_PHB_IODA1:
> + pnv_pci_ioda1_setup_dma_pe(phb, pe);
> + break;
> + case PNV_PHB_IODA2:
> + pnv_pci_ioda2_setup_dma_pe(phb, pe);
> + break;
> + default:
> + pr_warn("%s: No DMA for PHB#%x (type %d)\n",
> + __func__, phb->hose->global_number, phb->type);
> + }
> + }
> +
> if (pdn)
> pdn->pe_number = pe->pe_number;
> pe->device_count++;
> @@ -2222,6 +2247,7 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
> pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
> iommu_init_table(tbl, phb->hose->node, 0, 0);
>
> + pe->dma_setup_done = true;
> return;
> fail:
> /* XXX Failure: Try to fallback to 64-bit only ? */
> @@ -2536,9 +2562,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> {
> int64_t rc;
>
> - if (!pnv_pci_ioda_pe_dma_weight(pe))
> - return;
> -
> /* TVE #1 is selected by PCI address bit 59 */
> pe->tce_bypass_base = 1ull << 59;
>
> @@ -2563,6 +2586,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> iommu_register_group(&pe->table_group, phb->hose->global_number,
> pe->pe_number);
> #endif
> + pe->dma_setup_done = true;
> }
>
> int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
> @@ -3136,7 +3160,6 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
>
> static void pnv_pci_configure_bus(struct pci_bus *bus)
> {
> - struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
> struct pci_dev *bridge = bus->self;
> struct pnv_ioda_pe *pe;
> bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
> @@ -3160,17 +3183,6 @@ static void pnv_pci_configure_bus(struct pci_bus *bus)
> return;
>
> pnv_ioda_setup_pe_seg(pe);
> - switch (phb->type) {
> - case PNV_PHB_IODA1:
> - pnv_pci_ioda1_setup_dma_pe(phb, pe);
> - break;
> - case PNV_PHB_IODA2:
> - pnv_pci_ioda2_setup_dma_pe(phb, pe);
> - break;
> - default:
> - pr_warn("%s: No DMA for PHB#%x (type %d)\n",
> - __func__, phb->hose->global_number, phb->type);
> - }
> }
>
> static resource_size_t pnv_pci_default_alignment(void)
> @@ -3289,11 +3301,10 @@ static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group,
>
> static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
> {
> - unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
> struct iommu_table *tbl = pe->table_group.tables[0];
> int64_t rc;
>
> - if (!weight)
> + if (!pe->dma_setup_done)
> return;
>
> rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0);
> @@ -3313,10 +3324,9 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
> static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
> {
> struct iommu_table *tbl = pe->table_group.tables[0];
> - unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
> int64_t rc;
>
> - if (!weight)
> + if (pe->dma_setup_done)
> return;
>
> rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index 0727dec9a0d1..6aa6aefb637d 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -87,6 +87,13 @@ struct pnv_ioda_pe {
> bool tce_bypass_enabled;
> uint64_t tce_bypass_base;
>
> + /*
> + * Used to track whether we've done DMA setup for this PE or not. We
> + * want to defer allocating TCE tables, etc until we've added a
> + * non-bridge device to the PE.
> + */
> + bool dma_setup_done;
> +
> /* MSIs. MVE index is identical for for 32 and 64 bit MSI
> * and -1 if not supported. (It's actually identical to the
> * PE number)
>
--
Alexey
More information about the Linuxppc-dev
mailing list