[PATCH v6 08/42] powerpc/powernv: Calculate PHB's DMA weight dynamically
Alexey Kardashevskiy
aik at ozlabs.ru
Mon Aug 10 19:21:12 AEST 2015
On 08/06/2015 02:11 PM, Gavin Shan wrote:
> For P7IOC, the whole available DMA32 space, which is below the
> MEM32 space, is divided evenly into 256MB segments. The number
> of continuous segments assigned to one particular PE depends on
> the PE's DMA weight that is calculated based on the type of each
> PCI devices contained in the PE, and PHB's DMA weight which is
> accumulative DMA weight of PEs contained in the PHB. It means
> that the PHB's DMA weight calculation depends on existing PEs,
> which works perfectly now, but not hotplug friendly. As the
> whole available DMA32 space can be assigned to one PE on PHB3,
> so we don't have the issue on PHB3.
>
> The patch calculates PHB's DMA weight based on the PCI devices
> contained in the PHB dynamically so that it's hotplug friendly.
>
> Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
> ---
> arch/powerpc/platforms/powernv/pci-ioda.c | 88 +++++++++++++++----------------
> arch/powerpc/platforms/powernv/pci.h | 6 ---
> 2 files changed, 43 insertions(+), 51 deletions(-)
>
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 713f4b4..7342cfd 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -927,6 +927,9 @@ static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
>
> static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
> {
> + struct pci_controller *hose = pci_bus_to_host(dev->bus);
> + struct pnv_phb *phb = hose->private_data;
> +
> /* This is quite simplistic. The "base" weight of a device
> * is 10. 0 means no DMA is to be accounted for it.
> */
> @@ -939,14 +942,34 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
> if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
> dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
> dev->class == PCI_CLASS_SERIAL_USB_EHCI)
> - return 3;
> + return 3 * phb->ioda.tce32_count;
>
> /* Increase the weight of RAID (includes Obsidian) */
> if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
> - return 15;
> + return 15 * phb->ioda.tce32_count;
>
> /* Default */
> - return 10;
> + return 10 * phb->ioda.tce32_count;
> +}
> +
> +static int __pnv_ioda_phb_dma_weight(struct pci_dev *pdev, void *data)
> +{
> + unsigned int *dma_weight = data;
> +
> + *dma_weight += pnv_ioda_dma_weight(pdev);
> + return 0;
> +}
> +
> +static unsigned int pnv_ioda_phb_dma_weight(struct pnv_phb *phb)
> +{
> + unsigned int dma_weight = 0;
> +
> + if (!phb->hose->bus)
> + return 0;
> +
> + pci_walk_bus(phb->hose->bus,
> + __pnv_ioda_phb_dma_weight, &dma_weight);
> + return dma_weight;
> }
>
> #ifdef CONFIG_PCI_IOV
> @@ -1097,14 +1120,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
> /* Put PE to the list */
> list_add_tail(&pe->list, &phb->ioda.pe_list);
>
> - /* Account for one DMA PE if at least one DMA capable device exist
> - * below the bridge
> - */
> - if (pe->dma_weight != 0) {
> - phb->ioda.dma_weight += pe->dma_weight;
> - phb->ioda.dma_pe_count++;
> - }
> -
> /* Link the PE */
> pnv_ioda_link_pe_by_weight(phb, pe);
> }
> @@ -2431,24 +2446,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> static void pnv_ioda_setup_dma(struct pnv_phb *phb)
> {
> struct pci_controller *hose = phb->hose;
> - unsigned int residual, remaining, segs, tw, base;
> struct pnv_ioda_pe *pe;
> + unsigned int dma_weight;
>
> - /* If we have more PE# than segments available, hand out one
> - * per PE until we run out and let the rest fail. If not,
> - * then we assign at least one segment per PE, plus more based
> - * on the amount of devices under that PE
> - */
> - if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
> - residual = 0;
> - else
> - residual = phb->ioda.tce32_count -
> - phb->ioda.dma_pe_count;
> -
> - pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
> - hose->global_number, phb->ioda.tce32_count);
> - pr_info("PCI: %d PE# for a total weight of %d\n",
> - phb->ioda.dma_pe_count, phb->ioda.dma_weight);
> + /* Calculate the PHB's DMA weight */
> + dma_weight = pnv_ioda_phb_dma_weight(phb);
> + pr_info("PCI%04x has %ld DMA32 segments, total weight %d\n",
> + hose->global_number, phb->ioda.tce32_count, dma_weight);
>
> pnv_pci_ioda_setup_opal_tce_kill(phb);
>
> @@ -2456,22 +2460,9 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
> * out one base segment plus any residual segments based on
> * weight
> */
> - remaining = phb->ioda.tce32_count;
> - tw = phb->ioda.dma_weight;
> - base = 0;
> list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
> if (!pe->dma_weight)
> continue;
> - if (!remaining) {
> - pe_warn(pe, "No DMA32 resources available\n");
> - continue;
> - }
> - segs = 1;
> - if (residual) {
> - segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
> - if (segs > remaining)
> - segs = remaining;
> - }
>
> /*
> * For IODA2 compliant PHB3, we needn't care about the weight.
> @@ -2479,17 +2470,24 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
> * the specific PE.
> */
> if (phb->type == PNV_PHB_IODA1) {
> - pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
> + unsigned int segs, base = 0;
> +
> + if (pe->dma_weight <
> + dma_weight / phb->ioda.tce32_count)
> + segs = 1;
> + else
> + segs = (pe->dma_weight *
> + phb->ioda.tce32_count) / dma_weight;
> +
> + pe_info(pe, "DMA32 weight %d, assigned %d segments\n",
> pe->dma_weight, segs);
> pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
> +
> + base += segs;
This is not right. @base here is a local variable in the scope,
pnv_pci_ioda_setup_dma_pe() will always be called with base==0.
Sorry for commenting the same patch twice.
> } else {
> pe_info(pe, "Assign DMA32 space\n");
> - segs = 0;
> pnv_pci_ioda2_setup_dma_pe(phb, pe);
> }
> -
> - remaining -= segs;
> - base += segs;
> }
> }
>
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index 08a4e57..addd3f7 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -183,12 +183,6 @@ struct pnv_phb {
> /* 32-bit TCE tables allocation */
> unsigned long tce32_count;
>
> - /* Total "weight" for the sake of DMA resources
> - * allocation
> - */
> - unsigned int dma_weight;
> - unsigned int dma_pe_count;
> -
> /* Sorted list of used PE's, sorted at
> * boot for resource allocation purposes
> */
>
--
Alexey
More information about the Linuxppc-dev
mailing list