[PATCH 2/7] powerpc/85xx: add HOTPLUG_CPU support
Scott Wood
scottwood at freescale.com
Sat Nov 5 05:35:08 EST 2011
On 11/04/2011 07:31 AM, Zhao Chenhui wrote:
> From: Li Yang <leoli at freescale.com>
>
> Add support to disable and re-enable individual cores at runtime
> on MPC85xx/QorIQ SMP machines. Currently support e500 core.
>
> MPC85xx machines use ePAPR spin-table in boot page for CPU kick-off.
> This patch uses the boot page from bootloader to boot core at runtime.
> It supports 32-bit and 36-bit physical address.
Note that there is no guarantee that the bootloader can handle you
resetting a core. In ePAPR the spin table is a one-time release
mechanism, not a core reset mechanism. If this has a U-Boot dependency,
document that.
> #ifdef CONFIG_SMP
> /* When we get here, r24 needs to hold the CPU # */
> .globl __secondary_start
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 7bf2187..12a54f0 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -381,8 +381,14 @@ void generic_cpu_die(unsigned int cpu)
>
> for (i = 0; i < 100; i++) {
> smp_rmb();
> - if (per_cpu(cpu_state, cpu) == CPU_DEAD)
> + if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
> + /*
> + * After another core sets cpu_state to CPU_DEAD,
> + * it needs some time to die.
> + */
> + msleep(10);
> return;
> + }
> msleep(100);
It would be better to do this as a call into platform-specific code than
can check registers to determine whether the core has checked out (in
our case, whether it has entered nap) -- or to do a suitable delay for
that platform if this isn't possible.
> diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
> index 9b0de9c..5a54fc1 100644
> --- a/arch/powerpc/platforms/85xx/smp.c
> +++ b/arch/powerpc/platforms/85xx/smp.c
> @@ -17,6 +17,7 @@
> #include <linux/of.h>
> #include <linux/kexec.h>
> #include <linux/highmem.h>
> +#include <linux/cpu.h>
>
> #include <asm/machdep.h>
> #include <asm/pgtable.h>
> @@ -30,26 +31,141 @@
>
> extern void __early_start(void);
>
> -#define BOOT_ENTRY_ADDR_UPPER 0
> -#define BOOT_ENTRY_ADDR_LOWER 1
> -#define BOOT_ENTRY_R3_UPPER 2
> -#define BOOT_ENTRY_R3_LOWER 3
> -#define BOOT_ENTRY_RESV 4
> -#define BOOT_ENTRY_PIR 5
> -#define BOOT_ENTRY_R6_UPPER 6
> -#define BOOT_ENTRY_R6_LOWER 7
> -#define NUM_BOOT_ENTRY 8
> -#define SIZE_BOOT_ENTRY (NUM_BOOT_ENTRY * sizeof(u32))
> -
> -static int __init
> -smp_85xx_kick_cpu(int nr)
> +#define MPC85xx_BPTR_OFF 0x00020
> +#define MPC85xx_BPTR_EN 0x80000000
> +#define MPC85xx_BPTR_BOOT_PAGE_MASK 0x00ffffff
> +#define MPC85xx_BRR_OFF 0xe0e4
> +#define MPC85xx_ECM_EEBPCR_OFF 0x01010
> +#define MPC85xx_PIC_PIR_OFF 0x41090
> +
> +struct epapr_entry {
ePAPR is more than just the spin table. Call it something like
epapr_spin_table.
> + u32 addr_h;
> + u32 addr_l;
> + u32 r3_h;
> + u32 r3_l;
> + u32 reserved;
> + u32 pir;
> + u32 r6_h;
> + u32 r6_l;
> +};
Get rid of r6, it is not part of the ePAPR spin table.
> +static int is_corenet;
> +static void __cpuinit smp_85xx_setup_cpu(int cpu_nr);
> +
> +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC32)
Why PPC32?
> +extern void flush_disable_L1(void);
If this isn't already in a header file, put it in one.
> +static void __cpuinit smp_85xx_mach_cpu_die(void)
> +{
> + unsigned int cpu = smp_processor_id();
> + register u32 tmp;
> +
> + local_irq_disable();
> + idle_task_exit();
> + generic_set_cpu_dead(cpu);
> + smp_wmb();
> +
> + mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
> + mtspr(SPRN_TCR, 0);
If clearing TSR matters at all (I'm not sure that it does), first clear
TCR, then TSR.
> + flush_disable_L1();
You'll also need to take down L2 on e500mc.
> + tmp = 0;
> + if (cpu_has_feature(CPU_FTR_CAN_NAP))
> + tmp = HID0_NAP;
> + else if (cpu_has_feature(CPU_FTR_CAN_DOZE))
> + tmp = HID0_DOZE;
Those FTR bits are for what we can do in idle, and can be cleared if the
user sets CONFIG_BDI_SWITCH.
On 85xx we always want to nap here, and at least on e500mc it seems to
be mandatory. From the p5020 RM description of PIR:
> For proper system operation, a core should be reset in this way only if the core is already in nap or sleep
> state. Because a core in either state cannot perform the necessary write to cause a hard reset, a core cannot
> put itself into hard reset.
Note that on e500mc we don't use HID0/MSR_WE to enter nap, we need to
hit the CCSR register. And unless you can somehow guarantee that only
one core at a time is doing this, we'll need some oher core to actually
place us in nap (since once we enter nap we're not running so can't
release a lock).
> + if (tmp) {
> + tmp |= mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_NAP|HID0_SLEEP);
> +
> + smp_mb();
smp_mb()? This is always SMP... It looks like you meant some specific
sync instruction as part of an architected sequence, so just use that.
> + isync();
> + mtspr(SPRN_HID0, tmp);
> + isync();
> +
> + tmp = mfmsr();
> + tmp |= MSR_WE;
> + smp_mb();
> + mtmsr(tmp);
> + isync();
> + }
> +
> + for (;;);
> +}
> +
> +static void __cpuinit smp_85xx_reset_core(int nr)
> +{
> + __iomem u32 *vaddr, *pir_vaddr;
> + u32 val, cpu_mask;
> +
> + /* If CoreNet platform, use BRR as release register. */
> + if (is_corenet) {
> + cpu_mask = 1 << nr;
> + vaddr = ioremap(get_immrbase() + MPC85xx_BRR_OFF, 4);
> + } else {
> + cpu_mask = 1 << (24 + nr);
> + vaddr = ioremap(get_immrbase() + MPC85xx_ECM_EEBPCR_OFF, 4);
> + }
Please use the device tree node, not get_immrbase().
> + val = in_be32(vaddr);
> + if (!(val & cpu_mask)) {
> + out_be32(vaddr, val | cpu_mask);
> + } else {
> + /* reset core */
> + pir_vaddr = ioremap(get_immrbase() + MPC85xx_PIC_PIR_OFF, 4);
> + val = in_be32(pir_vaddr);
> + /* reset assert */
> + val |= (1 << nr);
> + out_be32(pir_vaddr, val);
Use setbits32().
> + val = in_be32(pir_vaddr);
> + val &= ~(1 << nr);
> + /* reset negate */
> + out_be32(pir_vaddr, val);
clrbits32().
Is there any amount of time we need to keep the reset pin asserted?
> + iounmap(pir_vaddr);
> + }
> + iounmap(vaddr);
> +}
> +
> +static int __cpuinit smp_85xx_map_bootpg(u32 page)
> +{
> + __iomem u32 *bootpg_ptr;
> + u32 bptr;
> +
> + /* Get the BPTR */
> + bootpg_ptr = ioremap(get_immrbase() + MPC85xx_BPTR_OFF, 4);
> +
> + /* Set the BPTR to the secondary boot page */
> + bptr = MPC85xx_BPTR_EN | (page & MPC85xx_BPTR_BOOT_PAGE_MASK);
> + out_be32(bootpg_ptr, bptr);
> +
> + iounmap(bootpg_ptr);
> + return 0;
> +}
Shouldn't the boot page already be set by U-Boot?
> +static int __cpuinit smp_85xx_kick_cpu(int nr)
> {
> unsigned long flags;
> const u64 *cpu_rel_addr;
> - __iomem u32 *bptr_vaddr;
> + __iomem struct epapr_entry *epapr;
> struct device_node *np;
> - int n = 0;
> + int n = 0, hw_cpu = get_hard_smp_processor_id(nr);
> int ioremappable;
> + int ret = 0;
>
> WARN_ON (nr < 0 || nr >= NR_CPUS);
>
> @@ -73,46 +189,79 @@ smp_85xx_kick_cpu(int nr)
>
> /* Map the spin table */
> if (ioremappable)
> - bptr_vaddr = ioremap(*cpu_rel_addr, SIZE_BOOT_ENTRY);
> + epapr = ioremap(*cpu_rel_addr, sizeof(struct epapr_entry));
> else
> - bptr_vaddr = phys_to_virt(*cpu_rel_addr);
> + epapr = phys_to_virt(*cpu_rel_addr);
>
> local_irq_save(flags);
>
> - out_be32(bptr_vaddr + BOOT_ENTRY_PIR, nr);
> + out_be32(&epapr->pir, hw_cpu);
> #ifdef CONFIG_PPC32
> - out_be32(bptr_vaddr + BOOT_ENTRY_ADDR_LOWER, __pa(__early_start));
> +#ifdef CONFIG_HOTPLUG_CPU
> + if (system_state == SYSTEM_RUNNING) {
> + out_be32(&epapr->addr_l, 0);
> + smp_85xx_map_bootpg((u32)(*cpu_rel_addr >> PAGE_SHIFT));
Why is this inside PPC32?
> + smp_85xx_reset_core(hw_cpu);
> +
> + /* wait until core is ready... */
> + n = 0;
> + while ((in_be32(&epapr->addr_l) != 1) && (++n < 1000))
> + udelay(100);
> + if (n > 1000) {
if (n == 1000)
or
if (in_be32(&epapr->addr_l) != 1)
> + pr_err("timeout waiting for core%d to reset\n", nr);
> + ret = -ENOENT;
> + goto out;
> + }
> + /* clear the acknowledge status */
> + __secondary_hold_acknowledge = -1;
> +
> + smp_85xx_unmap_bootpg();
> + }
> +#endif
> + out_be32(&epapr->addr_l, __pa(__early_start));
>
> if (!ioremappable)
> - flush_dcache_range((ulong)bptr_vaddr,
> - (ulong)(bptr_vaddr + SIZE_BOOT_ENTRY));
> + flush_dcache_range((ulong)epapr,
> + (ulong)epapr + sizeof(struct epapr_entry));
>
> /* Wait a bit for the CPU to ack. */
> - while ((__secondary_hold_acknowledge != nr) && (++n < 1000))
> + n = 0;
> + while ((__secondary_hold_acknowledge != hw_cpu) && (++n < 1000))
> mdelay(1);
> + if (n > 1000) {
if (n == 1000)
or
if (__secondary_hold_acknowledge != hw_cpu)
> + pr_err("timeout waiting for core%d to ack\n", nr);
pr_err("%s: timeout waiting for core %d to ack\n", __func__, nr);
Likewise elsewhere. Maybe also/instead mention hw_cpu.
> + ret = -ENOENT;
> + goto out;
> + }
> +out:
> #else
> smp_generic_kick_cpu(nr);
>
> - out_be64((u64 *)(bptr_vaddr + BOOT_ENTRY_ADDR_UPPER),
> + out_be64((u64 *)(&epapr->addr_h),
> __pa((u64)*((unsigned long long *) generic_secondary_smp_init)));
>
> if (!ioremappable)
> - flush_dcache_range((ulong)bptr_vaddr,
> - (ulong)(bptr_vaddr + SIZE_BOOT_ENTRY));
> + flush_dcache_range((ulong)epapr,
> + (ulong)epapr + sizeof(struct epapr_entry));
We don't wait for the core to come up on 64-bit?
> @@ -228,14 +376,18 @@ void __init mpc85xx_smp_init(void)
> {
> struct device_node *np;
>
> - smp_85xx_ops.setup_cpu = smp_85xx_setup_cpu;
> -
> np = of_find_node_by_type(NULL, "open-pic");
> if (np) {
> smp_85xx_ops.probe = smp_mpic_probe;
> smp_85xx_ops.message_pass = smp_mpic_message_pass;
> }
>
> + /* Check if the chip is based on CoreNet platform. */
> + is_corenet = 0;
> + np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-device-config-1.0");
> + if (np)
> + is_corenet = 1;
Please also check for the non-corenet guts node. If you don't find
either, disable the mechanism -- you're probably running under a hypervisor.
-Scott
More information about the Linuxppc-dev
mailing list