[PATCH v6 08/42] powerpc/powernv: Calculate PHB's DMA weight dynamically

Gavin Shan gwshan at linux.vnet.ibm.com
Thu Aug 13 09:57:26 AEST 2015


On Mon, Aug 10, 2015 at 07:21:12PM +1000, Alexey Kardashevskiy wrote:
>On 08/06/2015 02:11 PM, Gavin Shan wrote:
>>For P7IOC, the whole available DMA32 space, which is below the
>>MEM32 space, is divided evenly into 256MB segments. The number
>>of continuous segments assigned to one particular PE depends on
>>the PE's DMA weight that is calculated based on the type of each
>>PCI devices contained in the PE, and PHB's DMA weight which is
>>accumulative DMA weight of PEs contained in the PHB. It means
>>that the PHB's DMA weight calculation depends on existing PEs,
>>which works perfectly now, but not hotplug friendly. As the
>>whole available DMA32 space can be assigned to one PE on PHB3,
>>so we don't have the issue on PHB3.
>>
>>The patch calculates PHB's DMA weight based on the PCI devices
>>contained in the PHB dynamically so that it's hotplug friendly.
>>
>>Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
>>---
>>  arch/powerpc/platforms/powernv/pci-ioda.c | 88 +++++++++++++++----------------
>>  arch/powerpc/platforms/powernv/pci.h      |  6 ---
>>  2 files changed, 43 insertions(+), 51 deletions(-)
>>
>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>index 713f4b4..7342cfd 100644
>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>@@ -927,6 +927,9 @@ static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
>>
>>  static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
>>  {
>>+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
>>+	struct pnv_phb *phb = hose->private_data;
>>+
>>  	/* This is quite simplistic. The "base" weight of a device
>>  	 * is 10. 0 means no DMA is to be accounted for it.
>>  	 */
>>@@ -939,14 +942,34 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
>>  	if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
>>  	    dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
>>  	    dev->class == PCI_CLASS_SERIAL_USB_EHCI)
>>-		return 3;
>>+		return 3 * phb->ioda.tce32_count;
>>
>>  	/* Increase the weight of RAID (includes Obsidian) */
>>  	if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
>>-		return 15;
>>+		return 15 * phb->ioda.tce32_count;
>>
>>  	/* Default */
>>-	return 10;
>>+	return 10 * phb->ioda.tce32_count;
>>+}
>>+
>>+static int __pnv_ioda_phb_dma_weight(struct pci_dev *pdev, void *data)
>>+{
>>+	unsigned int *dma_weight = data;
>>+
>>+	*dma_weight += pnv_ioda_dma_weight(pdev);
>>+	return 0;
>>+}
>>+
>>+static unsigned int pnv_ioda_phb_dma_weight(struct pnv_phb *phb)
>>+{
>>+	unsigned int dma_weight = 0;
>>+
>>+	if (!phb->hose->bus)
>>+		return 0;
>>+
>>+	pci_walk_bus(phb->hose->bus,
>>+		     __pnv_ioda_phb_dma_weight, &dma_weight);
>>+	return dma_weight;
>>  }
>>
>>  #ifdef CONFIG_PCI_IOV
>>@@ -1097,14 +1120,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
>>  	/* Put PE to the list */
>>  	list_add_tail(&pe->list, &phb->ioda.pe_list);
>>
>>-	/* Account for one DMA PE if at least one DMA capable device exist
>>-	 * below the bridge
>>-	 */
>>-	if (pe->dma_weight != 0) {
>>-		phb->ioda.dma_weight += pe->dma_weight;
>>-		phb->ioda.dma_pe_count++;
>>-	}
>>-
>>  	/* Link the PE */
>>  	pnv_ioda_link_pe_by_weight(phb, pe);
>>  }
>>@@ -2431,24 +2446,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
>>  static void pnv_ioda_setup_dma(struct pnv_phb *phb)
>>  {
>>  	struct pci_controller *hose = phb->hose;
>>-	unsigned int residual, remaining, segs, tw, base;
>>  	struct pnv_ioda_pe *pe;
>>+	unsigned int dma_weight;
>>
>>-	/* If we have more PE# than segments available, hand out one
>>-	 * per PE until we run out and let the rest fail. If not,
>>-	 * then we assign at least one segment per PE, plus more based
>>-	 * on the amount of devices under that PE
>>-	 */
>>-	if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
>>-		residual = 0;
>>-	else
>>-		residual = phb->ioda.tce32_count -
>>-			phb->ioda.dma_pe_count;
>>-
>>-	pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
>>-		hose->global_number, phb->ioda.tce32_count);
>>-	pr_info("PCI: %d PE# for a total weight of %d\n",
>>-		phb->ioda.dma_pe_count, phb->ioda.dma_weight);
>>+	/* Calculate the PHB's DMA weight */
>>+	dma_weight = pnv_ioda_phb_dma_weight(phb);
>>+	pr_info("PCI%04x has %ld DMA32 segments, total weight %d\n",
>>+		hose->global_number, phb->ioda.tce32_count, dma_weight);
>>
>>  	pnv_pci_ioda_setup_opal_tce_kill(phb);
>>
>>@@ -2456,22 +2460,9 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
>>  	 * out one base segment plus any residual segments based on
>>  	 * weight
>>  	 */
>>-	remaining = phb->ioda.tce32_count;
>>-	tw = phb->ioda.dma_weight;
>>-	base = 0;
>>  	list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
>>  		if (!pe->dma_weight)
>>  			continue;
>>-		if (!remaining) {
>>-			pe_warn(pe, "No DMA32 resources available\n");
>>-			continue;
>>-		}
>>-		segs = 1;
>>-		if (residual) {
>>-			segs += ((pe->dma_weight * residual)  + (tw / 2)) / tw;
>>-			if (segs > remaining)
>>-				segs = remaining;
>>-		}
>>
>>  		/*
>>  		 * For IODA2 compliant PHB3, we needn't care about the weight.
>>@@ -2479,17 +2470,24 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
>>  		 * the specific PE.
>>  		 */
>>  		if (phb->type == PNV_PHB_IODA1) {
>>-			pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
>>+			unsigned int segs, base = 0;
>>+
>>+			if (pe->dma_weight <
>>+			    dma_weight / phb->ioda.tce32_count)
>>+				segs = 1;
>>+			else
>>+				segs = (pe->dma_weight *
>>+					phb->ioda.tce32_count) / dma_weight;
>>+
>>+			pe_info(pe, "DMA32 weight %d, assigned %d segments\n",
>>  				pe->dma_weight, segs);
>>  			pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
>>+
>>+			base += segs;
>
>
>This is not right. @base here is a local variable in the scope,
>pnv_pci_ioda_setup_dma_pe() will always be called with base==0.
>
>
>Sorry for commenting the same patch twice.
>

That's ok to comment for twice on same patch. But I don't see
how it's wrong. The function (pnv_ioda_setup_dma()) is called
as below and it iterate all PEs in the PHB's DMA32 list. That
means the function is affects PHB, not every PE yet. It's out
of problem with "base=0".

pnv_pci_ioda_fixup
  pnv_pci_ioda_setup_DMA
    pnv_ioda_setup_dma
>
>>  		} else {
>>  			pe_info(pe, "Assign DMA32 space\n");
>>-			segs = 0;
>>  			pnv_pci_ioda2_setup_dma_pe(phb, pe);
>>  		}
>>-
>>-		remaining -= segs;
>>-		base += segs;
>>  	}
>>  }
>>
>>diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
>>index 08a4e57..addd3f7 100644
>>--- a/arch/powerpc/platforms/powernv/pci.h
>>+++ b/arch/powerpc/platforms/powernv/pci.h
>>@@ -183,12 +183,6 @@ struct pnv_phb {
>>  			/* 32-bit TCE tables allocation */
>>  			unsigned long		tce32_count;
>>
>>-			/* Total "weight" for the sake of DMA resources
>>-			 * allocation
>>-			 */
>>-			unsigned int		dma_weight;
>>-			unsigned int		dma_pe_count;
>>-
>>  			/* Sorted list of used PE's, sorted at
>>  			 * boot for resource allocation purposes
>>  			 */
>>

Thanks,
Gavin



More information about the Linuxppc-dev mailing list