[RFC PATCH] powerpc/powernv: Provide a way to force a core into SMT4 mode
Ram Pai
linuxram at us.ibm.com
Sat Jan 27 12:06:10 AEDT 2018
On Thu, Jan 25, 2018 at 04:05:12PM +1100, Paul Mackerras wrote:
> POWER9 processors up to and including "Nimbus" v2.2 have hardware
> bugs relating to transactional memory and thread reconfiguration.
> One of these bugs has a workaround which is to get the core into
> SMT4 state temporarily. This workaround is only needed when
> running bare-metal.
..snip..
>
> Signed-off-by: Paul Mackerras <paulus at ozlabs.org>
> ---
> arch/powerpc/include/asm/paca.h | 3 ++
> arch/powerpc/kernel/asm-offsets.c | 1 +
> arch/powerpc/kernel/idle_book3s.S | 15 +++++++++
> arch/powerpc/platforms/powernv/idle.c | 62 +++++++++++++++++++++++++++++++++++
> 4 files changed, 81 insertions(+)
>
..snip..
> STOP_SPR(STOP_PID, pid);
> STOP_SPR(STOP_LDBAR, ldbar);
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 01e1c19..4a7f88c 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -430,10 +430,23 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
> */
> _GLOBAL(power9_idle_stop)
> std r3, PACA_REQ_PSSCR(r13)
this instruction can go a little later and save a few cycles, in the
case it need not have to stop ?
> + sync
> + lwz r5, PACA_DONT_STOP(r13)
> + cmpwi r5, 0
> + bne 1f
I mean 'std r3, ...' can move here.
> mtspr SPRN_PSSCR,r3
> LOAD_REG_ADDR(r4,power_enter_stop)
> b pnv_powersave_common
> /* No return */
> +1:
> + /*
> + * We get here when TM / thread reconfiguration bug workaround
> + * code wants to get the CPU into SMT4 mode, and therefore
> + * we are being asked not to stop.
> + */
> + li r3, 0
> + std r3, PACA_REQ_PSSCR(r13)
> + blr /* return 0 for wakeup cause / SRR1 value */
>
> /*
> * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
> @@ -584,6 +597,8 @@ FTR_SECTION_ELSE_NESTED(71)
> mfspr r5, SPRN_PSSCR
> rldicl r5,r5,4,60
> ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
> + li r0, 0 /* clear requested_psscr to say we're awake */
> + std r0, PACA_REQ_PSSCR(r13)
> cmpd cr4,r5,r4
> bge cr4,pnv_wakeup_tb_loss /* returns to caller */
>
> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
> index 443d5ca..72d5a85 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -24,6 +24,7 @@
> #include <asm/code-patching.h>
> #include <asm/smp.h>
> #include <asm/runlatch.h>
> +#include <asm/dbell.h>
>
> #include "powernv.h"
> #include "subcore.h"
> @@ -387,6 +388,67 @@ void power9_idle(void)
> power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
> }
>
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +/*
> + * This is used in working around bugs in thread reconfiguration
> + * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
> + * memory and the way that XER[SO] is checkpointed.
> + * This function forces the core into SMT4 in order by asking
> + * all other threads not to stop, and sending a message to any
> + * that are in a stop state.
> + * Must be called with preemption disabled.
> + */
> +void pnv_power9_force_smt4(void)
> +{
> + int cpu, cpu0, thr;
> + struct paca_struct *tpaca;
> + int awake_threads = 1; /* this thread is awake */
> + int poke_threads = 0;
> +
> + cpu = smp_processor_id();
> + cpu0 = cpu & ~(threads_per_core - 1);
> + tpaca = &paca[cpu0];
> + for (thr = 0; thr < threads_per_core; ++thr) {
> + if (cpu != cpu0 + thr)
> + atomic_inc(&tpaca[thr].dont_stop);
> + }
> + /* order setting dont_stop vs testing requested_psscr */
> + mb();
> + for (thr = 0; thr < threads_per_core; ++thr) {
> + if (!tpaca[thr].requested_psscr)
> + ++awake_threads;
> + else
> + poke_threads |= (1 << thr);
ppc_msgsnd(...) can be called here in the else part?
> + }
> +
> + /* If at least 3 threads are awake, the core is in SMT4 already */
small nitpick -- this comment mentions SMT4 and 3 threads. But the code
is generically applicable to SMTn and (n-1) threads.
> + if (awake_threads < threads_per_core - 1) {
> + /* We have to wake some threads; we'll use msgsnd */
> + for (thr = 0; thr < threads_per_core; ++thr) {
> + if (poke_threads & (1 << thr))
> + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
> + tpaca[thr].hw_cpu_id);
> + }
and this loop can be deleted, which inturn can leads to further optimizations.
> + /* now spin until at least 3 threads are awake */
> + do {
> + for (thr = 0; thr < threads_per_core; ++thr) {
> + if ((poke_threads & (1 << thr)) &&
> + !tpaca[thr].requested_psscr) {
> + ++awake_threads;
> + poke_threads &= ~(1 << thr);
> + }
> + }
> + } while (awake_threads < threads_per_core - 1);
> + }
> + /* clear all the dont_stop flags */
> + for (thr = 0; thr < threads_per_core; ++thr) {
> + if (cpu != cpu0 + thr)
> + atomic_dec(&tpaca[thr].dont_stop);
> + }
> +}
> +EXPORT_SYMBOL_GPL(pnv_power9_force_smt4);
> +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
> +
> #ifdef CONFIG_HOTPLUG_CPU
> static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
> {
> --
> 2.7.4
--
Ram Pai
More information about the Linuxppc-dev
mailing list