[PATCH v4 03/21] powerpc/powernv: M64 support improvement

Gavin Shan gwshan at linux.vnet.ibm.com
Mon May 11 14:47:04 AEST 2015


On Sat, May 09, 2015 at 08:24:14PM +1000, Alexey Kardashevskiy wrote:
>On 05/01/2015 04:02 PM, Gavin Shan wrote:
>>We're having the hardware or enforced (on P7IOC) limitation: M64
>
>I would think if it is enforced, then it is enforced by hardware but you say
>"hardware OR enforced" :)
>

PHB3 doesn't have M64DT from hardware. P7IOC supports that, but I
don't utilize the capability. So I called it's enforced. Maybe it
may be more clear to have "software enforced" ? :-)

>
>>segment#x can only be assigned to PE#x. IO and M32 segment can be
>>mapped to arbitrary PE# via IODT and M32DT. It means the PE number
>>should be x if M64 segment#x has been assigned to the PE. Also, each
>>PE own one M64 segment at most. Currently, we are reserving PE#
>>according to root port's M64 window. It won't be reliable once we
>>extend M64 windows of root port, or the upstream port of the PCIE
>>switch behind root port to PHB's M64 window, in order to support
>>PCI hotplug in future.
>>
>>The patch reserves PE# for M64 segments according to the M64 resources
>>of the PCI devices (not bridges) contained in the PE. Besides, it's
>>always worthy to trace the M64 segments consumed by the PE, which can
>>be released at PCI unplugging time.
>>
>>Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
>>---
>>  arch/powerpc/platforms/powernv/pci-ioda.c | 190 ++++++++++++++++++------------
>>  arch/powerpc/platforms/powernv/pci.h      |  10 +-
>>  2 files changed, 122 insertions(+), 78 deletions(-)
>>
>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>index 646962f..a994882 100644
>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>@@ -283,28 +283,78 @@ fail:
>>  	return -EIO;
>>  }
>>
>>-static void pnv_ioda_reserve_m64_pe(struct pnv_phb *phb)
>>+/* We extend the M64 window of root port, or the upstream bridge port
>>+ * of the PCIE switch behind root port. So we shouldn't reserve PEs
>>+ * for M64 resources because there are no (normal) PCI devices consuming
>
>"PCI devices"? Not "root ports or PCI bridges"?
>

I have "no (normal) PCI devices" here, which means root port and PCI bridges
are excluded.

>>+ * M64 resources on the PCI buses leading from root port, or the upstream
>>+ * bridge port.The function returns true if the indicated PCI bus needs
>>+ * reserved PEs because of M64 resources in advance. Otherwise, the
>>+ * function returns false.
>>+ */
>>+static bool pnv_ioda_need_m64_pe(struct pnv_phb *phb,
>>+				 struct pci_bus *bus)
>>  {
>>-	resource_size_t sgsz = phb->ioda.m64_segsize;
>>+	/* Root bus */
>
>The comment is too obvious as the call below is called "pci_is_root_bus" :)
>

Indeed, it will be dropped in next revision.

>>+	if (!bus || pci_is_root_bus(bus))
>>+		return false;
>>+
>>+	/* Bus leading from root port. We need check what types of PCI
>>+	 * devices on the bus. If it's connecting PCI bridge, we don't
>>+	 * need reserve M64 PEs for it. Otherwise, we still need to do
>>+	 * that.
>>+	 */
>>+	if (pci_is_root_bus(bus->self->bus)) {
>>+		struct pci_dev *pdev;
>>+
>>+		list_for_each_entry(pdev, &bus->devices, bus_list) {
>>+			if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL)
>>+				return true;
>>+		}
>>+
>>+		return false;
>>+	}
>>+
>>+	/* Bus leading from the upstream bridge port on top level */
>>+	if (pci_is_root_bus(bus->self->bus->self->bus))
>
>
>Is it for second level bridges? Like root->bridge->bridge? And for 3 levels
>you will need a PE?
>

It's for upstream port of PCIe switch behind root port (a bit complicated).
Yes, the bus leaded from the downstream port will need a PE as you said.

>>+		return false;
>>+
>>+	return true;
>>+}
>>+
>>+static void pnv_ioda_reserve_m64_pe(struct pnv_phb *phb,
>>+				    struct pci_bus *bus)
>>+{
>>+	resource_size_t segsz = phb->ioda.m64_segsize;
>>  	struct pci_dev *pdev;
>>  	struct resource *r;
>>-	int base, step, i;
>>+	unsigned long pe_no, limit;
>>+	int i;
>>
>>-	/*
>>-	 * Root bus always has full M64 range and root port has
>>-	 * M64 range used in reality. So we're checking root port
>>-	 * instead of root bus.
>>+	if (!pnv_ioda_need_m64_pe(phb, bus))
>>+		return;
>>+
>>+	/* The bridge's M64 window might have been extended to the
>>+	 * PHB's M64 window in order to support PCI hotplug. So the
>>+	 * bridge's M64 window isn't reliable to be used for picking
>>+	 * PE# for its leading PCI bus. We have to check the M64
>>+	 * resources consumed by the PCI devices, which seat on the
>>+	 * PCI bus.
>>  	 */
>>-	list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) {
>>-		for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
>>-			r = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
>>-			if (!r->parent ||
>>-			    !pnv_pci_is_mem_pref_64(r->flags))
>>+	list_for_each_entry(pdev, &bus->devices, bus_list) {
>>+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
>>+#ifdef CONFIG_PCI_IOV
>>+			if (i >= PCI_IOV_RESOURCES && i <= PCI_IOV_RESOURCE_END)
>>+				continue;
>>+#endif
>>+			r = &pdev->resource[i];
>>+			if (!r->flags || r->start >= r->end ||
>>+			    !r->parent || !pnv_pci_is_mem_pref_64(r->flags))
>>  				continue;
>>
>>-			base = (r->start - phb->ioda.m64_base) / sgsz;
>>-			for (step = 0; step < resource_size(r) / sgsz; step++)
>>-				pnv_ioda_reserve_pe(phb, base + step);
>>+			pe_no = (r->start - phb->ioda.m64_base) / segsz;
>>+			limit = ALIGN(r->end - phb->ioda.m64_base, segsz) / segsz;
>>+			for (; pe_no < limit; pe_no++)
>>+				pnv_ioda_reserve_pe(phb, pe_no);
>>  		}
>>  	}
>>  }
>>@@ -316,85 +366,64 @@ static int pnv_ioda_pick_m64_pe(struct pnv_phb *phb,
>>  	struct pci_dev *pdev;
>>  	struct resource *r;
>>  	struct pnv_ioda_pe *master_pe, *pe;
>>-	unsigned long size, *pe_alloc;
>>-	bool found;
>>-	int start, i, j;
>>-
>>-	/* Root bus shouldn't use M64 */
>>-	if (pci_is_root_bus(bus))
>>-		return IODA_INVALID_PE;
>>-
>>-	/* We support only one M64 window on each bus */
>>-	found = false;
>>-	pci_bus_for_each_resource(bus, r, i) {
>>-		if (r && r->parent &&
>>-		    pnv_pci_is_mem_pref_64(r->flags)) {
>>-			found = true;
>>-			break;
>>-		}
>>-	}
>>+	unsigned long size, *pe_bitsmap;
>
>s/pe_bitsmap/pe_bitmap/
>

Yeah, will fix it up. Thanks!

>>+	unsigned long pe_no, limit;
>>+	int i;
>>
>>-	/* No M64 window found ? */
>>-	if (!found)
>>+	if (!pnv_ioda_need_m64_pe(phb, bus))
>>  		return IODA_INVALID_PE;
>>
>>-	/* Allocate bitmap */
>>+        /* Allocate bitmap */
>>  	size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
>>-	pe_alloc = kzalloc(size, GFP_KERNEL);
>>-	if (!pe_alloc) {
>>-		pr_warn("%s: Out of memory !\n",
>>-			__func__);
>>+	pe_bitsmap = kzalloc(size, GFP_KERNEL);
>>+	if (!pe_bitsmap) {
>>+		pr_warn("%s: Out of memory !\n", __func__);
>>  		return IODA_INVALID_PE;
>>  	}
>>
>>-	/*
>>-	 * Figure out reserved PE numbers by the PE
>>-	 * the its child PEs.
>>-	 */
>>-	start = (r->start - phb->ioda.m64_base) / segsz;
>>-	for (i = 0; i < resource_size(r) / segsz; i++)
>>-		set_bit(start + i, pe_alloc);
>>-
>>-	if (all)
>>-		goto done;
>>-
>>-	/*
>>-	 * If the PE doesn't cover all subordinate buses,
>>-	 * we need subtract from reserved PEs for children.
>>+	/* The bridge's M64 window might be extended to PHB's M64
>>+	 * window by intention to support PCI hotplug. So we have
>>+	 * to check the M64 resources consumed by the PCI devices
>>+	 * on the PCI bus.
>>  	 */
>>  	list_for_each_entry(pdev, &bus->devices, bus_list) {
>>-		if (!pdev->subordinate)
>>-			continue;
>>+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
>>+#ifdef CONFIG_PCI_IOV
>>+			if (i >= PCI_IOV_RESOURCES &&
>>+			    i <= PCI_IOV_RESOURCE_END)
>>+				continue;
>>+#endif
>>+			/* Don't scan bridge's window if the PE
>>+			 * doesn't contain its subordinate bus.
>>+			 */
>>+			if (!all && i >= PCI_BRIDGE_RESOURCES &&
>>+			    i <= PCI_BRIDGE_RESOURCE_END)
>>+				continue;
>>
>>-		pci_bus_for_each_resource(pdev->subordinate, r, i) {
>>-			if (!r || !r->parent ||
>>-			    !pnv_pci_is_mem_pref_64(r->flags))
>>+			r = &pdev->resource[i];
>>+			if (!r->flags || r->start >= r->end ||
>>+			    !r->parent || !pnv_pci_is_mem_pref_64(r->flags))
>>  				continue;
>>
>>-			start = (r->start - phb->ioda.m64_base) / segsz;
>>-			for (j = 0; j < resource_size(r) / segsz ; j++)
>>-				clear_bit(start + j, pe_alloc);
>>-                }
>>-        }
>>+			pe_no = (r->start - phb->ioda.m64_base) / segsz;
>>+			limit = ALIGN(r->end - phb->ioda.m64_base, segsz) / segsz;
>>+			for (; pe_no < limit; pe_no++)
>>+				set_bit(pe_no, pe_bitsmap);
>>+		}
>>+	}
>>
>>-	/*
>>-	 * the current bus might not own M64 window and that's all
>>-	 * contributed by its child buses. For the case, we needn't
>>-	 * pick M64 dependent PE#.
>>-	 */
>>-	if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) {
>>-		kfree(pe_alloc);
>>+	/* No M64 window found ? */
>>+	if (bitmap_empty(pe_bitsmap, phb->ioda.total_pe)) {
>>+		kfree(pe_bitsmap);
>>  		return IODA_INVALID_PE;
>>  	}
>>
>>-	/*
>>-	 * Figure out the master PE and put all slave PEs to master
>>-	 * PE's list to form compound PE.
>>+	/* Figure out the master PE and put all slave PEs
>>+	 * to master PE's list to form compound PE.
>>  	 */
>>-done:
>>  	master_pe = NULL;
>>  	i = -1;
>>-	while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
>>+	while ((i = find_next_bit(pe_bitsmap, phb->ioda.total_pe, i + 1)) <
>>  		phb->ioda.total_pe) {
>>  		pe = &phb->ioda.pe_array[i];
>>
>>@@ -408,6 +437,13 @@ done:
>>  			list_add_tail(&pe->list, &master_pe->slaves);
>>  		}
>>
>>+		/* Pick the M64 segment, which should be available. Also,
>
>test_and_set_bit() does not pick or choose, it just marks PE#pe_number used.
>

It's true. I will replace "Pick" with "Reserve" M64 segment in next revision.
If that's still not what you're suggesting, please let me know :-)

>>+		 * those M64 segments consumed by slave PEs are contributed
>>+		 * to the master PE.
>>+		 */
>>+		BUG_ON(test_and_set_bit(pe->pe_number, phb->ioda.m64_segmap));
>>+		BUG_ON(test_and_set_bit(pe->pe_number, master_pe->m64_segmap));
>>+
>>  		/* P7IOC supports M64DT, which helps mapping M64 segment
>>  		 * to one particular PE#. Unfortunately, PHB3 has fixed
>>  		 * mapping between M64 segment and PE#. In order for same
>>@@ -431,7 +467,7 @@ done:
>>  		}
>>  	}
>>
>>-	kfree(pe_alloc);
>>+	kfree(pe_bitsmap);
>>  	return master_pe->pe_number;
>>  }
>>
>>@@ -1233,7 +1269,7 @@ static void pnv_pci_ioda_setup_PEs(void)
>>
>>  		/* M64 layout might affect PE allocation */
>>  		if (phb->reserve_m64_pe)
>>-			phb->reserve_m64_pe(phb);
>>+			phb->reserve_m64_pe(phb, phb->hose->bus);
>>
>>  		pnv_ioda_setup_PEs(hose->bus);
>>  	}
>>diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
>>index 070ee88..19022cf 100644
>>--- a/arch/powerpc/platforms/powernv/pci.h
>>+++ b/arch/powerpc/platforms/powernv/pci.h
>>@@ -49,6 +49,13 @@ struct pnv_ioda_pe {
>>  	/* PE number */
>>  	unsigned int		pe_number;
>>
>>+	/* IO/M32/M64 segments consumed by the PE. Each PE can
>>+	 * have one M64 segment at most, but M64 segments consumed
>>+	 * by slave PEs will be contributed to the master PE. One
>>+	 * PE can own multiple IO and M32 segments.
>>+	 */
>>+	unsigned long		m64_segmap[8];
>
>
>Why 8? 64*8 = 512 segments?  s'8'512/sizeof(unsigned long)' may be?
>

There are 128 M64 segments for P7IOC, but 256 M64 segments for PHB3.
512 is number bigger than 128 and 256. I still prefer m64_segmap[8] :-)

>
>>+
>>  	/* "Weight" assigned to the PE for the sake of DMA resource
>>  	 * allocations
>>  	 */
>>@@ -114,7 +121,7 @@ struct pnv_phb {
>>  	u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
>>  	void (*shutdown)(struct pnv_phb *phb);
>>  	int (*init_m64)(struct pnv_phb *phb);
>>-	void (*reserve_m64_pe)(struct pnv_phb *phb);
>>+	void (*reserve_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus);
>>  	int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all);
>>  	int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
>>  	void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
>>@@ -153,6 +160,7 @@ struct pnv_phb {
>>  			struct mutex		pe_alloc_mutex;
>>
>>  			/* M32 & IO segment maps */
>>+			unsigned long		m64_segmap[8];
>>  			unsigned int		*m32_segmap;
>>  			unsigned int		*io_segmap;
>>  			struct pnv_ioda_pe	*pe_array;
>>

Thanks,
Gavin



More information about the Linuxppc-dev mailing list