[PATCH v9 2/2] powerpc/64s: KVM update for reimplement book3s idle code in C
Nicholas Piggin
npiggin at gmail.com
Sat Apr 13 00:30:53 AEST 2019
This is the KVM update to the new idle code. A few improvements:
- Idle sleepers now always return to caller rather than branch out
to KVM first.
- This allows optimisations like very fast return to caller when no
state has been lost.
- KVM no longer requires nap_state_lost because it controls NVGPR
save/restore itself on the way in and out.
- The heavy idle wakeup KVM request check can be moved out of the
normal host idle code and into the not-performance-critical offline
code.
- KVM nap code now returns from where it is called, which makes the
flow a bit easier to follow.
---
arch/powerpc/include/asm/paca.h | 1 -
arch/powerpc/kernel/asm-offsets.c | 1 -
arch/powerpc/kernel/exceptions-64s.S | 14 ++-
arch/powerpc/kernel/idle_book3s.S | 22 -----
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 119 ++++++++++++++----------
arch/powerpc/platforms/powernv/idle.c | 15 +++
arch/powerpc/xmon/xmon.c | 3 -
7 files changed, 93 insertions(+), 82 deletions(-)
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index e55dedd7ee3e..245d11a71784 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -174,7 +174,6 @@ struct paca_struct {
u8 io_sync; /* writel() needs spin_unlock sync */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- u8 nap_state_lost; /* NV GPR values lost in power7_idle */
u8 pmcregs_in_use; /* pseries puts this in lppaca */
#endif
u64 sprg_vdso; /* Saved user-visible sprg */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 167a59fda12e..83ad99f9f05d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -268,7 +268,6 @@ int main(void)
OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
- OFFSET(PACA_NAPSTATELOST, paca_struct, nap_state_lost);
OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
#else /* CONFIG_PPC64 */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index c4c50bca12c7..6247b5bbfa5c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -121,6 +121,8 @@ EXC_VIRT_NONE(0x4000, 0x100)
rlwinm. r10,r10,47-31,30,31 ; \
beq- 1f ; \
cmpwi cr1,r10,2 ; \
+ mfspr r3,SPRN_SRR1 ; \
+ bltlr cr1 ; /* no state loss, return to idle caller */ \
BRANCH_TO_C000(r10, system_reset_idle_common) ; \
1: \
KVMTEST_PR(n) ; \
@@ -144,12 +146,10 @@ TRAMP_KVM(PACA_EXNMI, 0x100)
#ifdef CONFIG_PPC_P7_NAP
EXC_COMMON_BEGIN(system_reset_idle_common)
- mfspr r3,SPRN_SRR1
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* this bltlr could be moved before the branch_to, and the
- * branch_to could maybe go straight to idle_return */
- bltlr cr1 /* no state loss, return to idle caller */
-#endif
+ /*
+ * This must be a direct branch (without linker branch stub) because
+ * we can not use TOC at this point as r2 may not be restored yet.
+ */
b idle_return_gpr_loss
#endif
@@ -441,9 +441,7 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
mtlr r4
rlwinm r10,r3,47-31,30,31
cmpwi cr1,r10,2
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
bltlr cr1 /* no state loss, return to idle caller */
-#endif
b idle_return_gpr_loss
#endif
/*
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 0fb2eb731a29..2dfbd5d5b932 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -19,9 +19,6 @@
#include <asm/asm-offsets.h>
#include <asm/ppc-opcode.h>
#include <asm/cpuidle.h>
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-#include <asm/kvm_book3s_asm.h>
-#endif
/*
* Desired PSSCR in r3
@@ -93,25 +90,6 @@ _GLOBAL(isa300_idle_stop_mayloss)
* a simple blr instead).
*/
_GLOBAL(idle_return_gpr_loss)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- lbz r0,HSTATE_HWTHREAD_STATE(r13)
- cmpwi r0,KVM_HWTHREAD_IN_KERNEL
- beq 0f
- li r0,KVM_HWTHREAD_IN_KERNEL
- stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* Order setting hwthread_state vs. testing hwthread_req */
- sync
-0: lbz r0,HSTATE_HWTHREAD_REQ(r13)
- cmpwi r0,0
- beq 1f
- b kvm_start_guest
-1:
- lbz r0,PACA_NAPSTATELOST(r13)
- cmpwi r0,0
- bne 2f
- bltlr cr1
-2:
-#endif
ld r1,PACAR1(r13)
ld r4,-8*19(r1)
ld r5,-8*20(r1)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 58d0f1ba845d..e9c69089080a 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -35,6 +35,7 @@
#include <asm/thread_info.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
+#include <asm/cpuidle.h>
/* Sign-extend HDEC if not on POWER9 */
#define EXTEND_HDEC(reg) \
@@ -45,6 +46,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
#define NAPPING_NOVCPU 2
+#define NAPPING_UNSPLIT 3
/* Stack frame offsets for kvmppc_hv_entry */
#define SFS 208
@@ -290,17 +292,20 @@ kvm_novcpu_exit:
b kvmhv_switch_to_host
/*
- * We come in here when wakened from nap mode.
- * Relocation is off and most register values are lost.
- * r13 points to the PACA.
+ * We come in here when wakened from Linux offline idle code.
+ * Relocation is off
* r3 contains the SRR1 wakeup value, SRR1 is trashed.
*/
- .globl kvm_start_guest
-kvm_start_guest:
- /* Set runlatch bit the minute you wake up from nap */
- mfspr r0, SPRN_CTRLF
- ori r0, r0, 1
- mtspr SPRN_CTRLT, r0
+ .globl idle_kvm_start_guest
+idle_kvm_start_guest:
+ ld r4,PACAEMERGSP(r13)
+ mfcr r5
+ mflr r0
+ std r1,0(r4)
+ std r5,8(r4)
+ std r0,16(r4)
+ subi r1,r4,STACK_FRAME_OVERHEAD
+ SAVE_NVGPRS(r1)
/*
* Could avoid this and pass it through in r3. For now,
@@ -308,27 +313,23 @@ kvm_start_guest:
*/
mtspr SPRN_SRR1,r3
- ld r2,PACATOC(r13)
-
li r0,0
stb r0,PACA_FTRACE_ENABLED(r13)
li r0,KVM_HWTHREAD_IN_KVM
stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* NV GPR values from power7_idle() will no longer be valid */
- li r0,1
- stb r0,PACA_NAPSTATELOST(r13)
-
- /* were we napping due to cede? */
+ /* kvm cede / napping does not come through here */
lbz r0,HSTATE_NAPPING(r13)
- cmpwi r0,NAPPING_CEDE
- beq kvm_end_cede
- cmpwi r0,NAPPING_NOVCPU
- beq kvm_novcpu_wakeup
+ twnei r0,0
+
+ b 1f
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
+kvm_unsplit_wakeup:
+ li r0, 0
+ stb r0, HSTATE_NAPPING(r13)
+
+1:
/*
* We weren't napping due to cede, so this must be a secondary
@@ -437,21 +438,25 @@ kvm_no_guest:
lbz r3, HSTATE_HWTHREAD_REQ(r13)
cmpwi r3, 0
bne 54f
-/*
- * We jump to pnv_wakeup_loss, which will return to the caller
- * of power7_nap in the powernv cpu offline loop. The value we
- * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss
- * requires SRR1 in r12.
- */
+
+ /*
+ * Jump to idle_return_gpr_loss, which returns to the
+ * idle_kvm_start_guest caller.
+ */
li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
- li r3, 0
- /* set up cr3 and r3 for return */
- cmpdi cr3, r3, 0
+ /* set up r3 for return */
mfspr r3,SPRN_SRR1
- b idle_return_gpr_loss
+ REST_NVGPRS(r1)
+ addi r1, r1, STACK_FRAME_OVERHEAD
+ ld r0, 16(r1)
+ ld r5, 8(r1)
+ ld r1, 0(r1)
+ mtlr r0
+ mtcr r5
+ blr
53: HMT_LOW
ld r5, HSTATE_KVM_VCORE(r13)
@@ -536,6 +541,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
lbz r0, KVM_SPLIT_DO_NAP(r3)
cmpwi r0, 0
beq 57f
+ li r3, NAPPING_UNSPLIT
+ stb r3, HSTATE_NAPPING(r13)
li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
mfspr r5, SPRN_LPCR
rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
@@ -2656,6 +2663,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lis r3, LPCR_PECEDP at h /* Do wake on privileged doorbell */
+ /* Go back to host stack */
+ ld r1, HSTATE_HOST_R1(r13)
+
/*
* Take a nap until a decrementer or external or doobell interrupt
* occurs, with PECE1 and PECE0 set in LPCR.
@@ -2684,26 +2694,42 @@ BEGIN_FTR_SECTION
* requested level = 0 (just stop dispatching)
*/
lis r3, (PSSCR_EC | PSSCR_ESL)@h
- mtspr SPRN_PSSCR, r3
/* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
li r4, LPCR_PECE_HVEE at higher
sldi r4, r4, 32
or r5, r5, r4
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+FTR_SECTION_ELSE
+ li r3, PNV_THREAD_NAP
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_LPCR,r5
isync
- li r0, 0
- std r0, HSTATE_SCRATCH0(r13)
- ptesync
- ld r0, HSTATE_SCRATCH0(r13)
-1: cmpd r0, r0
- bne 1b
+
BEGIN_FTR_SECTION
- nap
+ bl isa300_idle_stop_mayloss
FTR_SECTION_ELSE
- PPC_STOP
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
- b .
+ bl isa206_idle_insn_mayloss
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
+
+ mfspr r0, SPRN_CTRLF
+ ori r0, r0, 1
+ mtspr SPRN_CTRLT, r0
+
+ mtspr SPRN_SRR1, r3
+
+ li r0, 0
+ stb r0, PACA_FTRACE_ENABLED(r13)
+
+ li r0, KVM_HWTHREAD_IN_KVM
+ stb r0, HSTATE_HWTHREAD_STATE(r13)
+
+ lbz r0, HSTATE_NAPPING(r13)
+ cmpwi r0, NAPPING_CEDE
+ beq kvm_end_cede
+ cmpwi r0, NAPPING_NOVCPU
+ beq kvm_novcpu_wakeup
+ cmpwi r0, NAPPING_UNSPLIT
+ beq kvm_unsplit_wakeup
+ twi 31,0,0 /* Nap state must not be zero */
33: mr r4, r3
li r3, 0
@@ -2711,12 +2737,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
b 34f
kvm_end_cede:
+ /* Woken by external or decrementer interrupt */
+
/* get vcpu pointer */
ld r4, HSTATE_KVM_VCPU(r13)
- /* Woken by external or decrementer interrupt */
- ld r1, HSTATE_HOST_R1(r13)
-
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
addi r3, r4, VCPU_TB_RMINTR
bl kvmhv_accumulate_time
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 36bafeaa5514..7f7e7cdffcf5 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -482,6 +482,8 @@ static unsigned long power7_idle_insn(unsigned long type)
return srr1;
}
+extern unsigned long idle_kvm_start_guest(unsigned long srr1);
+
#ifdef CONFIG_HOTPLUG_CPU
static unsigned long power7_offline(void)
{
@@ -513,6 +515,14 @@ static unsigned long power7_offline(void)
srr1 = power7_idle_insn(power7_offline_type);
__ppc64_runlatch_on();
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ smp_mb();
+ if (local_paca->kvm_hstate.hwthread_req)
+ srr1 = idle_kvm_start_guest(srr1);
+#endif
+
mtmsr(MSR_KERNEL);
return srr1;
@@ -784,6 +794,11 @@ static unsigned long power9_offline_stop(unsigned long psscr)
srr1 = power9_idle_stop(psscr, false);
__ppc64_runlatch_on();
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ smp_mb();
+ if (local_paca->kvm_hstate.hwthread_req)
+ srr1 = idle_kvm_start_guest(srr1);
mtmsr(MSR_KERNEL);
#endif
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 77197110e900..e583ed3f6b93 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2431,9 +2431,6 @@ static void dump_one_paca(int cpu)
DUMP(p, irq_happened, "%#-*x");
DUMP(p, io_sync, "%#-*x");
DUMP(p, irq_work_pending, "%#-*x");
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- DUMP(p, nap_state_lost, "%#-*x");
-#endif
DUMP(p, sprg_vdso, "%#-*llx");
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
--
2.20.1
More information about the Linuxppc-dev
mailing list