[RFC PATCH] powerpc/powernv: Provide a way to force a core into SMT4 mode

Ram Pai linuxram at us.ibm.com
Sat Jan 27 12:06:10 AEDT 2018


On Thu, Jan 25, 2018 at 04:05:12PM +1100, Paul Mackerras wrote:
> POWER9 processors up to and including "Nimbus" v2.2 have hardware
> bugs relating to transactional memory and thread reconfiguration.
> One of these bugs has a workaround which is to get the core into
> SMT4 state temporarily.  This workaround is only needed when
> running bare-metal.

..snip..

> 
> Signed-off-by: Paul Mackerras <paulus at ozlabs.org>
> ---
>  arch/powerpc/include/asm/paca.h       |  3 ++
>  arch/powerpc/kernel/asm-offsets.c     |  1 +
>  arch/powerpc/kernel/idle_book3s.S     | 15 +++++++++
>  arch/powerpc/platforms/powernv/idle.c | 62 +++++++++++++++++++++++++++++++++++
>  4 files changed, 81 insertions(+)
> 
..snip..
>  	STOP_SPR(STOP_PID, pid);
>  	STOP_SPR(STOP_LDBAR, ldbar);
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 01e1c19..4a7f88c 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -430,10 +430,23 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
>   */
>  _GLOBAL(power9_idle_stop)
>  	std	r3, PACA_REQ_PSSCR(r13)
	
this instruction can go a little later and save a few cycles, in the
case it need not have to stop ?

> +	sync
> +	lwz	r5, PACA_DONT_STOP(r13)
> +	cmpwi	r5, 0
> +	bne	1f

I mean 'std r3, ...' can move here.

>  	mtspr 	SPRN_PSSCR,r3
>  	LOAD_REG_ADDR(r4,power_enter_stop)
>  	b	pnv_powersave_common
>  	/* No return */
> +1:
> +	/*
> +	 * We get here when TM / thread reconfiguration bug workaround
> +	 * code wants to get the CPU into SMT4 mode, and therefore
> +	 * we are being asked not to stop.
> +	 */
> +	li	r3, 0
> +	std	r3, PACA_REQ_PSSCR(r13)
> +	blr		/* return 0 for wakeup cause / SRR1 value */
> 
>  /*
>   * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
> @@ -584,6 +597,8 @@ FTR_SECTION_ELSE_NESTED(71)
>  	mfspr	r5, SPRN_PSSCR
>  	rldicl  r5,r5,4,60
>  ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
> +	li	r0, 0		/* clear requested_psscr to say we're awake */
> +	std	r0, PACA_REQ_PSSCR(r13)
>  	cmpd	cr4,r5,r4
>  	bge	cr4,pnv_wakeup_tb_loss /* returns to caller */
> 
> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
> index 443d5ca..72d5a85 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -24,6 +24,7 @@
>  #include <asm/code-patching.h>
>  #include <asm/smp.h>
>  #include <asm/runlatch.h>
> +#include <asm/dbell.h>
> 
>  #include "powernv.h"
>  #include "subcore.h"
> @@ -387,6 +388,67 @@ void power9_idle(void)
>  	power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
>  }
> 
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +/*
> + * This is used in working around bugs in thread reconfiguration
> + * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
> + * memory and the way that XER[SO] is checkpointed.
> + * This function forces the core into SMT4 in order by asking
> + * all other threads not to stop, and sending a message to any
> + * that are in a stop state.
> + * Must be called with preemption disabled.
> + */
> +void pnv_power9_force_smt4(void)
> +{
> +	int cpu, cpu0, thr;
> +	struct paca_struct *tpaca;
> +	int awake_threads = 1;		/* this thread is awake */
> +	int poke_threads = 0;
> +
> +	cpu = smp_processor_id();
> +	cpu0 = cpu & ~(threads_per_core - 1);
> +	tpaca = &paca[cpu0];
> +	for (thr = 0; thr < threads_per_core; ++thr) {
> +		if (cpu != cpu0 + thr)
> +			atomic_inc(&tpaca[thr].dont_stop);
> +	}
> +	/* order setting dont_stop vs testing requested_psscr */
> +	mb();
> +	for (thr = 0; thr < threads_per_core; ++thr) {
> +		if (!tpaca[thr].requested_psscr)
> +			++awake_threads;
> +		else
> +			poke_threads |= (1 << thr);

			ppc_msgsnd(...)  can be called here in the else part?
	
> +	}
> +
> +	/* If at least 3 threads are awake, the core is in SMT4 already */

small nitpick --  this comment mentions SMT4 and 3 threads. But the code
is generically applicable to SMTn and (n-1) threads.

> +	if (awake_threads < threads_per_core - 1) {


> +		/* We have to wake some threads; we'll use msgsnd */
> +		for (thr = 0; thr < threads_per_core; ++thr) {
> +			if (poke_threads & (1 << thr))
> +				ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
> +					   tpaca[thr].hw_cpu_id);
> +		}

and this loop can be deleted, which inturn can leads to further optimizations.


> +		/* now spin until at least 3 threads are awake */
> +		do {
> +			for (thr = 0; thr < threads_per_core; ++thr) {
> +				if ((poke_threads & (1 << thr)) &&
> +				    !tpaca[thr].requested_psscr) {
> +					++awake_threads;
> +					poke_threads &= ~(1 << thr);
> +				}
> +			}
> +		} while (awake_threads < threads_per_core - 1);
> +	}
> +	/* clear all the dont_stop flags */
> +	for (thr = 0; thr < threads_per_core; ++thr) {
> +		if (cpu != cpu0 + thr)
> +			atomic_dec(&tpaca[thr].dont_stop);
> +	}
> +}
> +EXPORT_SYMBOL_GPL(pnv_power9_force_smt4);
> +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
> +
>  #ifdef CONFIG_HOTPLUG_CPU
>  static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
>  {
> -- 
> 2.7.4

-- 
Ram Pai



More information about the Linuxppc-dev mailing list