[PATCH v2] powerpc/xive: Do not skip CPU-less nodes when creating the IPIs
Cédric Le Goater
clg at kaod.org
Tue Aug 10 22:10:33 AEST 2021
On 8/7/21 9:20 AM, Cédric Le Goater wrote:
> On PowerVM, CPU-less nodes can be populated with hot-plugged CPUs at
> runtime. Today, the IPI is not created for such nodes, and hot-plugged
> CPUs use a bogus IPI, which leads to soft lockups.
>
> We can not directly allocate and request the IPI on demand because
> bringup_up() is called under the IRQ sparse lock. The alternative is
> to allocate the IPIs for all possible nodes at startup and to request
> the mapping on demand when the first CPU of a node is brought up.
>
> Fixes: 7dcc37b3eff9 ("powerpc/xive: Map one IPI interrupt per node")
> Cc: stable at vger.kernel.org # v5.13
> Reported-by: Geetika Moolchandani <Geetika.Moolchandani1 at ibm.com>
> Cc: Srikar Dronamraju <srikar at linux.vnet.ibm.com>
> Cc: Laurent Vivier <lvivier at redhat.com>
> Signed-off-by: Cédric Le Goater <clg at kaod.org>
> Message-Id: <20210629131542.743888-1-clg at kaod.org>
> Signed-off-by: Cédric Le Goater <clg at kaod.org>
> ---
> arch/powerpc/sysdev/xive/common.c | 35 +++++++++++++++++++++----------
> 1 file changed, 24 insertions(+), 11 deletions(-)
I forgot to add that this version does break irqbalance anymore, since
Linux is not mapping interrupts of CPU-less nodes.
Anyhow, irqbalance is now fixed :
https://github.com/Irqbalance/irqbalance/commit/a7f81483a95a94d6a62ca7fb999a090e01c0de9b
So v1 (plus irqbalance patch above) or v2 are safe to use. I do prefer
v2.
Thanks to Laurent and Srikar for the extra tests,
C.
> diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
> index dbdbbc2f1dc5..943fd30095af 100644
> --- a/arch/powerpc/sysdev/xive/common.c
> +++ b/arch/powerpc/sysdev/xive/common.c
> @@ -67,6 +67,7 @@ static struct irq_domain *xive_irq_domain;
> static struct xive_ipi_desc {
> unsigned int irq;
> char name[16];
> + atomic_t started;
> } *xive_ipis;
>
> /*
> @@ -1120,7 +1121,7 @@ static const struct irq_domain_ops xive_ipi_irq_domain_ops = {
> .alloc = xive_ipi_irq_domain_alloc,
> };
>
> -static int __init xive_request_ipi(void)
> +static int __init xive_init_ipis(void)
> {
> struct fwnode_handle *fwnode;
> struct irq_domain *ipi_domain;
> @@ -1144,10 +1145,6 @@ static int __init xive_request_ipi(void)
> struct xive_ipi_desc *xid = &xive_ipis[node];
> struct xive_ipi_alloc_info info = { node };
>
> - /* Skip nodes without CPUs */
> - if (cpumask_empty(cpumask_of_node(node)))
> - continue;
> -
> /*
> * Map one IPI interrupt per node for all cpus of that node.
> * Since the HW interrupt number doesn't have any meaning,
> @@ -1159,11 +1156,6 @@ static int __init xive_request_ipi(void)
> xid->irq = ret;
>
> snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
> -
> - ret = request_irq(xid->irq, xive_muxed_ipi_action,
> - IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL);
> -
> - WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
> }
>
> return ret;
> @@ -1178,6 +1170,22 @@ static int __init xive_request_ipi(void)
> return ret;
> }
>
> +static int __init xive_request_ipi(unsigned int cpu)
> +{
> + struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)];
> + int ret;
> +
> + if (atomic_inc_return(&xid->started) > 1)
> + return 0;
> +
> + ret = request_irq(xid->irq, xive_muxed_ipi_action,
> + IRQF_PERCPU | IRQF_NO_THREAD,
> + xid->name, NULL);
> +
> + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
> + return ret;
> +}
> +
> static int xive_setup_cpu_ipi(unsigned int cpu)
> {
> unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
> @@ -1192,6 +1200,9 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
> if (xc->hw_ipi != XIVE_BAD_IRQ)
> return 0;
>
> + /* Register the IPI */
> + xive_request_ipi(cpu);
> +
> /* Grab an IPI from the backend, this will populate xc->hw_ipi */
> if (xive_ops->get_ipi(cpu, xc))
> return -EIO;
> @@ -1231,6 +1242,8 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc)
> if (xc->hw_ipi == XIVE_BAD_IRQ)
> return;
>
> + /* TODO: clear IPI mapping */
> +
> /* Mask the IPI */
> xive_do_source_set_mask(&xc->ipi_data, true);
>
> @@ -1253,7 +1266,7 @@ void __init xive_smp_probe(void)
> smp_ops->cause_ipi = xive_cause_ipi;
>
> /* Register the IPI */
> - xive_request_ipi();
> + xive_init_ipis();
>
> /* Allocate and setup IPI for the boot CPU */
> xive_setup_cpu_ipi(smp_processor_id());
>
More information about the Linuxppc-dev
mailing list