[PATCH v6 08/42] powerpc/powernv: Calculate PHB's DMA weight dynamically
Gavin Shan
gwshan at linux.vnet.ibm.com
Thu Aug 13 09:57:26 AEST 2015
On Mon, Aug 10, 2015 at 07:21:12PM +1000, Alexey Kardashevskiy wrote:
>On 08/06/2015 02:11 PM, Gavin Shan wrote:
>>For P7IOC, the whole available DMA32 space, which is below the
>>MEM32 space, is divided evenly into 256MB segments. The number
>>of continuous segments assigned to one particular PE depends on
>>the PE's DMA weight that is calculated based on the type of each
>>PCI devices contained in the PE, and PHB's DMA weight which is
>>accumulative DMA weight of PEs contained in the PHB. It means
>>that the PHB's DMA weight calculation depends on existing PEs,
>>which works perfectly now, but not hotplug friendly. As the
>>whole available DMA32 space can be assigned to one PE on PHB3,
>>so we don't have the issue on PHB3.
>>
>>The patch calculates PHB's DMA weight based on the PCI devices
>>contained in the PHB dynamically so that it's hotplug friendly.
>>
>>Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
>>---
>> arch/powerpc/platforms/powernv/pci-ioda.c | 88 +++++++++++++++----------------
>> arch/powerpc/platforms/powernv/pci.h | 6 ---
>> 2 files changed, 43 insertions(+), 51 deletions(-)
>>
>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>index 713f4b4..7342cfd 100644
>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>@@ -927,6 +927,9 @@ static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
>>
>> static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
>> {
>>+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
>>+ struct pnv_phb *phb = hose->private_data;
>>+
>> /* This is quite simplistic. The "base" weight of a device
>> * is 10. 0 means no DMA is to be accounted for it.
>> */
>>@@ -939,14 +942,34 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
>> if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
>> dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
>> dev->class == PCI_CLASS_SERIAL_USB_EHCI)
>>- return 3;
>>+ return 3 * phb->ioda.tce32_count;
>>
>> /* Increase the weight of RAID (includes Obsidian) */
>> if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
>>- return 15;
>>+ return 15 * phb->ioda.tce32_count;
>>
>> /* Default */
>>- return 10;
>>+ return 10 * phb->ioda.tce32_count;
>>+}
>>+
>>+static int __pnv_ioda_phb_dma_weight(struct pci_dev *pdev, void *data)
>>+{
>>+ unsigned int *dma_weight = data;
>>+
>>+ *dma_weight += pnv_ioda_dma_weight(pdev);
>>+ return 0;
>>+}
>>+
>>+static unsigned int pnv_ioda_phb_dma_weight(struct pnv_phb *phb)
>>+{
>>+ unsigned int dma_weight = 0;
>>+
>>+ if (!phb->hose->bus)
>>+ return 0;
>>+
>>+ pci_walk_bus(phb->hose->bus,
>>+ __pnv_ioda_phb_dma_weight, &dma_weight);
>>+ return dma_weight;
>> }
>>
>> #ifdef CONFIG_PCI_IOV
>>@@ -1097,14 +1120,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
>> /* Put PE to the list */
>> list_add_tail(&pe->list, &phb->ioda.pe_list);
>>
>>- /* Account for one DMA PE if at least one DMA capable device exist
>>- * below the bridge
>>- */
>>- if (pe->dma_weight != 0) {
>>- phb->ioda.dma_weight += pe->dma_weight;
>>- phb->ioda.dma_pe_count++;
>>- }
>>-
>> /* Link the PE */
>> pnv_ioda_link_pe_by_weight(phb, pe);
>> }
>>@@ -2431,24 +2446,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
>> static void pnv_ioda_setup_dma(struct pnv_phb *phb)
>> {
>> struct pci_controller *hose = phb->hose;
>>- unsigned int residual, remaining, segs, tw, base;
>> struct pnv_ioda_pe *pe;
>>+ unsigned int dma_weight;
>>
>>- /* If we have more PE# than segments available, hand out one
>>- * per PE until we run out and let the rest fail. If not,
>>- * then we assign at least one segment per PE, plus more based
>>- * on the amount of devices under that PE
>>- */
>>- if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
>>- residual = 0;
>>- else
>>- residual = phb->ioda.tce32_count -
>>- phb->ioda.dma_pe_count;
>>-
>>- pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
>>- hose->global_number, phb->ioda.tce32_count);
>>- pr_info("PCI: %d PE# for a total weight of %d\n",
>>- phb->ioda.dma_pe_count, phb->ioda.dma_weight);
>>+ /* Calculate the PHB's DMA weight */
>>+ dma_weight = pnv_ioda_phb_dma_weight(phb);
>>+ pr_info("PCI%04x has %ld DMA32 segments, total weight %d\n",
>>+ hose->global_number, phb->ioda.tce32_count, dma_weight);
>>
>> pnv_pci_ioda_setup_opal_tce_kill(phb);
>>
>>@@ -2456,22 +2460,9 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
>> * out one base segment plus any residual segments based on
>> * weight
>> */
>>- remaining = phb->ioda.tce32_count;
>>- tw = phb->ioda.dma_weight;
>>- base = 0;
>> list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
>> if (!pe->dma_weight)
>> continue;
>>- if (!remaining) {
>>- pe_warn(pe, "No DMA32 resources available\n");
>>- continue;
>>- }
>>- segs = 1;
>>- if (residual) {
>>- segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
>>- if (segs > remaining)
>>- segs = remaining;
>>- }
>>
>> /*
>> * For IODA2 compliant PHB3, we needn't care about the weight.
>>@@ -2479,17 +2470,24 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
>> * the specific PE.
>> */
>> if (phb->type == PNV_PHB_IODA1) {
>>- pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
>>+ unsigned int segs, base = 0;
>>+
>>+ if (pe->dma_weight <
>>+ dma_weight / phb->ioda.tce32_count)
>>+ segs = 1;
>>+ else
>>+ segs = (pe->dma_weight *
>>+ phb->ioda.tce32_count) / dma_weight;
>>+
>>+ pe_info(pe, "DMA32 weight %d, assigned %d segments\n",
>> pe->dma_weight, segs);
>> pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
>>+
>>+ base += segs;
>
>
>This is not right. @base here is a local variable in the scope,
>pnv_pci_ioda_setup_dma_pe() will always be called with base==0.
>
>
>Sorry for commenting the same patch twice.
>
That's ok to comment for twice on same patch. But I don't see
how it's wrong. The function (pnv_ioda_setup_dma()) is called
as below and it iterate all PEs in the PHB's DMA32 list. That
means the function is affects PHB, not every PE yet. It's out
of problem with "base=0".
pnv_pci_ioda_fixup
pnv_pci_ioda_setup_DMA
pnv_ioda_setup_dma
>
>> } else {
>> pe_info(pe, "Assign DMA32 space\n");
>>- segs = 0;
>> pnv_pci_ioda2_setup_dma_pe(phb, pe);
>> }
>>-
>>- remaining -= segs;
>>- base += segs;
>> }
>> }
>>
>>diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
>>index 08a4e57..addd3f7 100644
>>--- a/arch/powerpc/platforms/powernv/pci.h
>>+++ b/arch/powerpc/platforms/powernv/pci.h
>>@@ -183,12 +183,6 @@ struct pnv_phb {
>> /* 32-bit TCE tables allocation */
>> unsigned long tce32_count;
>>
>>- /* Total "weight" for the sake of DMA resources
>>- * allocation
>>- */
>>- unsigned int dma_weight;
>>- unsigned int dma_pe_count;
>>-
>> /* Sorted list of used PE's, sorted at
>> * boot for resource allocation purposes
>> */
>>
Thanks,
Gavin
More information about the Linuxppc-dev
mailing list