[PATCH v2] powerpc/pci: unmap legacy INTx interrupts when a PHB is removed
Alexey Kardashevskiy
aik at ozlabs.ru
Wed Sep 9 16:16:30 AEST 2020
On 07/08/2020 20:18, Cédric Le Goater wrote:
> When a passthrough IO adapter is removed from a pseries machine using
> hash MMU and the XIVE interrupt mode, the POWER hypervisor expects the
> guest OS to clear all page table entries related to the adapter. If
> some are still present, the RTAS call which isolates the PCI slot
> returns error 9001 "valid outstanding translations" and the removal of
> the IO adapter fails. This is because when the PHBs are scanned, Linux
> maps automatically the INTx interrupts in the Linux interrupt number
> space but these are never removed.
>
> To solve this problem, we introduce a PPC platform specific
> pcibios_remove_bus() routine which clears all interrupt mappings when
> the bus is removed. This also clears the associated page table entries
> of the ESB pages when using XIVE.
>
> For this purpose, we record the logical interrupt numbers of the
> mapped interrupt under the PHB structure and let pcibios_remove_bus()
> do the clean up.
>
> Since some PCI adapters, like GPUs, use the "interrupt-map" property
> to describe interrupt mappings other than the legacy INTx interrupts,
> we can not restrict the size of the mapping array to PCI_NUM_INTX. The
> number of interrupt mappings is computed from the "interrupt-map"
> property and the mapping array is allocated accordingly.
>
> Cc: "Oliver O'Halloran" <oohall at gmail.com>
> Cc: Alexey Kardashevskiy <aik at ozlabs.ru>
> Signed-off-by: Cédric Le Goater <clg at kaod.org>
I thought we could reuse some of the common OF code for the DT parsing
but we cannot (easily) so it is good as it is:
Reviewed-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> ---
>
> Changes since v2:
>
> - merged 2 patches.
>
> arch/powerpc/include/asm/pci-bridge.h | 6 ++
> arch/powerpc/kernel/pci-common.c | 114 ++++++++++++++++++++++++++
> 2 files changed, 120 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
> index b92e81b256e5..ca75cf264ddf 100644
> --- a/arch/powerpc/include/asm/pci-bridge.h
> +++ b/arch/powerpc/include/asm/pci-bridge.h
> @@ -48,6 +48,9 @@ struct pci_controller_ops {
>
> /*
> * Structure of a PCI controller (host bridge)
> + *
> + * @irq_count: number of interrupt mappings
> + * @irq_map: interrupt mappings
> */
> struct pci_controller {
> struct pci_bus *bus;
> @@ -127,6 +130,9 @@ struct pci_controller {
>
> void *private_data;
> struct npu *npu;
> +
> + unsigned int irq_count;
> + unsigned int *irq_map;
> };
>
> /* These are used for config access before all the PCI probing
> diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
> index be108616a721..deb831f0ae13 100644
> --- a/arch/powerpc/kernel/pci-common.c
> +++ b/arch/powerpc/kernel/pci-common.c
> @@ -353,6 +353,115 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr)
> return NULL;
> }
>
> +/*
> + * Assumption is made on the interrupt parent. All interrupt-map
> + * entries are considered to have the same parent.
> + */
> +static int pcibios_irq_map_count(struct pci_controller *phb)
> +{
> + const __be32 *imap;
> + int imaplen;
> + struct device_node *parent;
> + u32 intsize, addrsize, parintsize, paraddrsize;
> +
> + if (of_property_read_u32(phb->dn, "#interrupt-cells", &intsize))
> + return 0;
> + if (of_property_read_u32(phb->dn, "#address-cells", &addrsize))
> + return 0;
> +
> + imap = of_get_property(phb->dn, "interrupt-map", &imaplen);
> + if (!imap) {
> + pr_debug("%pOF : no interrupt-map\n", phb->dn);
> + return 0;
> + }
> + imaplen /= sizeof(u32);
> + pr_debug("%pOF : imaplen=%d\n", phb->dn, imaplen);
> +
> + if (imaplen < (addrsize + intsize + 1))
> + return 0;
> +
> + imap += intsize + addrsize;
> + parent = of_find_node_by_phandle(be32_to_cpup(imap));
> + if (!parent) {
> + pr_debug("%pOF : no imap parent found !\n", phb->dn);
> + return 0;
> + }
> +
> + if (of_property_read_u32(parent, "#interrupt-cells", &parintsize)) {
> + pr_debug("%pOF : parent lacks #interrupt-cells!\n", phb->dn);
> + return 0;
> + }
> +
> + if (of_property_read_u32(parent, "#address-cells", ¶ddrsize))
> + paraddrsize = 0;
> +
> + return imaplen / (addrsize + intsize + 1 + paraddrsize + parintsize);
> +}
> +
> +static void pcibios_irq_map_init(struct pci_controller *phb)
> +{
> + phb->irq_count = pcibios_irq_map_count(phb);
> + if (phb->irq_count < PCI_NUM_INTX)
> + phb->irq_count = PCI_NUM_INTX;
> +
> + pr_debug("%pOF : interrupt map #%d\n", phb->dn, phb->irq_count);
> +
> + phb->irq_map = kcalloc(phb->irq_count, sizeof(unsigned int),
> + GFP_KERNEL);
> +}
> +
> +static void pci_irq_map_register(struct pci_dev *pdev, unsigned int virq)
> +{
> + struct pci_controller *phb = pci_bus_to_host(pdev->bus);
> + int i;
> +
> + if (!phb->irq_map)
> + return;
> +
> + for (i = 0; i < phb->irq_count; i++) {
> + /*
> + * Look for an empty or an equivalent slot, as INTx
> + * interrupts can be shared between adapters.
> + */
> + if (phb->irq_map[i] == virq || !phb->irq_map[i]) {
> + phb->irq_map[i] = virq;
> + break;
> + }
> + }
> +
> + if (i == phb->irq_count)
> + pr_err("PCI:%s all platform interrupts mapped\n",
> + pci_name(pdev));
> +}
> +
> +/*
> + * Clearing the mapped interrupts will also clear the underlying
> + * mappings of the ESB pages of the interrupts when under XIVE. It is
> + * a requirement of PowerVM to clear all memory mappings before
> + * removing a PHB.
> + */
> +static void pci_irq_map_dispose(struct pci_bus *bus)
> +{
> + struct pci_controller *phb = pci_bus_to_host(bus);
> + int i;
> +
> + if (!phb->irq_map)
> + return;
> +
> + pr_debug("PCI: Clearing interrupt mappings for PHB %04x:%02x...\n",
> + pci_domain_nr(bus), bus->number);
> + for (i = 0; i < phb->irq_count; i++)
> + irq_dispose_mapping(phb->irq_map[i]);
> +
> + kfree(phb->irq_map);
> +}
> +
> +void pcibios_remove_bus(struct pci_bus *bus)
> +{
> + pci_irq_map_dispose(bus);
> +}
> +EXPORT_SYMBOL_GPL(pcibios_remove_bus);
> +
> /*
> * Reads the interrupt pin to determine if interrupt is use by card.
> * If the interrupt is used, then gets the interrupt line from the
> @@ -401,6 +510,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
>
> pci_dev->irq = virq;
>
> + /* Record all interrut mappings for later removal of a PHB */
> + pci_irq_map_register(pci_dev, virq);
> return 0;
> }
>
> @@ -1554,6 +1665,9 @@ void pcibios_scan_phb(struct pci_controller *hose)
>
> pr_debug("PCI: Scanning PHB %pOF\n", node);
>
> + /* Allocate interrupt mappings array */
> + pcibios_irq_map_init(hose);
> +
> /* Get some IO space for the new PHB */
> pcibios_setup_phb_io_space(hose);
>
>
--
Alexey
More information about the Linuxppc-dev
mailing list