[PATCH 1/2] powerpc/powernv: process interrupts from system reset wakeup

Michael Ellerman mpe at ellerman.id.au
Wed Mar 22 21:45:13 AEDT 2017


Nicholas Piggin <npiggin at gmail.com> writes:

> When the CPU wakes from low power state, it begins at the system reset
> interrupt with the exception that caused the wakeup encoded in SRR1.
>
> Today, powernv idle wakeup ignores the wakeup reason (except a special
> case for HMI), and the regular interrupt corresponding to the exception
> will fire after the idle wakeup exits.
>
> Change this to replay the interrupt from the idle wakeup before
> interrupts are hard-enabled.
>
> Test on POWER8 of context_switch selftests benchmark with polling idle
> disabled (e.g., always nap) gives the following results:
>
>                                 original         wakeup direct
> Different threads, same core:   315k/s           264k/s
> Different cores:                235k/s           242k/s
>
> There is a slowdown for doorbell IPI (same core) case because system
> reset wakeup does not clear the message and the doorbell interrupt fires
> again needlessly.

Seems like a win.

> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
> index 5011b69107a7..c0d9fd2e8c04 100644
> --- a/arch/powerpc/include/asm/machdep.h
> +++ b/arch/powerpc/include/asm/machdep.h
> @@ -223,7 +223,6 @@ struct machdep_calls {
>  
>  extern void e500_idle(void);
>  extern void power4_idle(void);
> -extern void power7_idle(void);
>  extern void ppc6xx_idle(void);
>  extern void book3e_idle(void);
>  
> diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
> index e0fecbcea2a2..8190943d2619 100644
> --- a/arch/powerpc/include/asm/processor.h
> +++ b/arch/powerpc/include/asm/processor.h
> @@ -454,6 +454,7 @@ extern int powersave_nap;	/* set if nap mode can be used in idle loop */
>  extern unsigned long power7_nap(int check_irq);
>  extern unsigned long power7_sleep(void);
>  extern unsigned long power7_winkle(void);
> +extern unsigned long power7_idle(void);
>  extern unsigned long power9_idle_stop(unsigned long stop_psscr_val,
>  				      unsigned long stop_psscr_mask);
>  
> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
> index 54546b632026..6b28a4f9c1fd 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -234,11 +234,16 @@ u64 pnv_default_stop_mask;
>  /*
>   * Used for ppc_md.power_save which needs a function with no parameters
>   */
> -static void power9_idle(void)
> +static void power9_power_save(void)
>  {
>  	power9_idle_stop(pnv_default_stop_val, pnv_default_stop_mask);
>  }
>  
> +static void power7_power_save(void)
> +{
> +	power7_idle();
> +}

Erk. This makes me wonder if we can just mandate using cpuidle for
powernv and drop ppc_md.power_save. I wonder who if anyone has ever
tested powernv without cpuidle.

I notice we already have:

config CPU_IDLE
	bool "CPU idle PM support"
	default y if ACPI || PPC_PSERIES


But not your problem for this patch.

> @@ -534,9 +539,9 @@ static int __init pnv_init_idle_states(void)
>  	}
>  
>  	if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
> -		ppc_md.power_save = power7_idle;
> +		ppc_md.power_save = power7_power_save;
>  	else if (supported_cpuidle_states & OPAL_PM_STOP_INST_FAST)
> -		ppc_md.power_save = power9_idle;
> +		ppc_md.power_save = power9_power_save;
>  
>  out:
>  	return 0;
> diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
> index 370593006f5f..e7e080c0790c 100644
> --- a/drivers/cpuidle/cpuidle-powernv.c
> +++ b/drivers/cpuidle/cpuidle-powernv.c
> @@ -70,13 +70,37 @@ static int snooze_loop(struct cpuidle_device *dev,
>  	return index;
>  }
>  
> +/*
> + * Table to convert shifted SRR1 wakeup reason for POWER7, POWER8, POWER9
> + * to __replay_interrupt vector.
> + */
> +static const unsigned short srr1_wakeup_to_replay_table[0x10] =
> +{ 0, 0, 0, 0xe80,	/* 0x3 = hv doorbell */
> +  0, 0xa00,		/* 0x5 = doorbell */
> +  0x900,		/* 0x6 = decrementer */
> +  0, 0x500,		/* 0x8 = external */
> +  0xea0,		/* 0x9 = hv virt (POWER9) */
> +  0xe60,		/* 0xa = hmi */
> +  0, 0, 0, 0, 0, };
> +
> +/* Shift SRR1_WAKEMASK_P8 down and convert to __replay_interrupt vector */
> +#define SRR1_TO_REPLAY(srr1) \
> +	((unsigned int)srr1_wakeup_to_replay_table[((srr1) >> 18) & 0xf])

This is a bit hairy, I'd just use a switch, but I guess this generates
vastly better code?

> +
>  static int nap_loop(struct cpuidle_device *dev,
>  			struct cpuidle_driver *drv,
>  			int index)
>  {
> +	u64 srr1;
> +	unsigned int reason;
> +
>  	ppc64_runlatch_off();
> -	power7_idle();
> +	srr1 = power7_idle();
>  	ppc64_runlatch_on();
> +
> +	reason = SRR1_TO_REPLAY(srr1);
> +	__replay_interrupt(reason);
> +
>  	return index;
>  }
>  
> @@ -111,10 +135,17 @@ static int stop_loop(struct cpuidle_device *dev,
>  		     struct cpuidle_driver *drv,
>  		     int index)
>  {
> +	u64 srr1;
> +	unsigned int reason;
> +
>  	ppc64_runlatch_off();
> -	power9_idle_stop(stop_psscr_table[index].val,
> +	srr1 = power9_idle_stop(stop_psscr_table[index].val,
>  			 stop_psscr_table[index].mask);
>  	ppc64_runlatch_on();
> +
> +	reason = SRR1_TO_REPLAY(srr1);
> +	__replay_interrupt(reason);
> +
>  	return index;
>  }

We end up with a bit of duplicated code there, but I guess it's not
worth worrying about.

cheers


More information about the Linuxppc-dev mailing list