[PATCH v1 17/55] KVM: PPC: Book3S HV P9: Implement PMU save/restore in C
Nicholas Piggin
npiggin at gmail.com
Mon Jul 26 13:49:58 AEST 2021
Implement the P9 path PMU save/restore code in C, and remove the
POWER9/10 code from the P7/8 path assembly.
-449 cycles (8533) POWER9 virt-mode NULL hcall
Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
arch/powerpc/include/asm/asm-prototypes.h | 5 -
arch/powerpc/kvm/book3s_hv.c | 205 ++++++++++++++++++++--
arch/powerpc/kvm/book3s_hv_interrupts.S | 13 +-
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 43 +----
4 files changed, 200 insertions(+), 66 deletions(-)
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 222823861a67..41b8a1e1144a 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -141,11 +141,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
bool preserve_nv) { }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-void kvmhv_save_host_pmu(void);
-void kvmhv_load_host_pmu(void);
-void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
-void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
-
void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2eef708c4354..d20b579ddcdf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3735,6 +3735,188 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1);
}
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+ unsigned long dscr;
+ unsigned long tidr;
+ unsigned long iamr;
+ unsigned long amr;
+ unsigned long fscr;
+
+ unsigned int pmc1;
+ unsigned int pmc2;
+ unsigned int pmc3;
+ unsigned int pmc4;
+ unsigned int pmc5;
+ unsigned int pmc6;
+ unsigned long mmcr0;
+ unsigned long mmcr1;
+ unsigned long mmcr2;
+ unsigned long mmcr3;
+ unsigned long mmcra;
+ unsigned long siar;
+ unsigned long sier1;
+ unsigned long sier2;
+ unsigned long sier3;
+ unsigned long sdar;
+};
+
+static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
+{
+ if (!(mmcr0 & MMCR0_FC))
+ goto do_freeze;
+ if (mmcra & MMCRA_SAMPLE_ENABLE)
+ goto do_freeze;
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (!(mmcr0 & MMCR0_PMCCEXT))
+ goto do_freeze;
+ if (!(mmcra & MMCRA_BHRB_DISABLE))
+ goto do_freeze;
+ }
+ return;
+
+do_freeze:
+ mmcr0 = MMCR0_FC;
+ mmcra = 0;
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mmcr0 |= MMCR0_PMCCEXT;
+ mmcra = MMCRA_BHRB_DISABLE;
+ }
+
+ mtspr(SPRN_MMCR0, mmcr0);
+ mtspr(SPRN_MMCRA, mmcra);
+ isync();
+}
+
+static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
+{
+ if (ppc_get_pmu_inuse()) {
+ /*
+ * It might be better to put PMU handling (at least for the
+ * host) in the perf subsystem because it knows more about what
+ * is being used.
+ */
+
+ /* POWER9, POWER10 do not implement HPMC or SPMC */
+
+ host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
+ host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
+
+ freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
+
+ host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
+ host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
+ host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
+ host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
+ host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
+ host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
+ host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
+ host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
+ host_os_sprs->sdar = mfspr(SPRN_SDAR);
+ host_os_sprs->siar = mfspr(SPRN_SIAR);
+ host_os_sprs->sier1 = mfspr(SPRN_SIER);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
+ host_os_sprs->sier2 = mfspr(SPRN_SIER2);
+ host_os_sprs->sier3 = mfspr(SPRN_SIER3);
+ }
+ }
+}
+
+static void load_p9_guest_pmu(struct kvm_vcpu *vcpu)
+{
+ mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
+ mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
+ mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
+ mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
+ mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
+ mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
+ mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
+ mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
+ mtspr(SPRN_SDAR, vcpu->arch.sdar);
+ mtspr(SPRN_SIAR, vcpu->arch.siar);
+ mtspr(SPRN_SIER, vcpu->arch.sier[0]);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]);
+ mtspr(SPRN_SIER2, vcpu->arch.sier[1]);
+ mtspr(SPRN_SIER3, vcpu->arch.sier[2]);
+ }
+
+ /* Set MMCRA then MMCR0 last */
+ mtspr(SPRN_MMCRA, vcpu->arch.mmcra);
+ mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]);
+ /* No isync necessary because we're starting counters */
+}
+
+static void save_p9_guest_pmu(struct kvm_vcpu *vcpu)
+{
+ struct lppaca *lp;
+ int save_pmu = 1;
+
+ lp = vcpu->arch.vpa.pinned_addr;
+ if (lp)
+ save_pmu = lp->pmcregs_in_use;
+
+ if (save_pmu) {
+ vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0);
+ vcpu->arch.mmcra = mfspr(SPRN_MMCRA);
+
+ freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra);
+
+ vcpu->arch.pmc[0] = mfspr(SPRN_PMC1);
+ vcpu->arch.pmc[1] = mfspr(SPRN_PMC2);
+ vcpu->arch.pmc[2] = mfspr(SPRN_PMC3);
+ vcpu->arch.pmc[3] = mfspr(SPRN_PMC4);
+ vcpu->arch.pmc[4] = mfspr(SPRN_PMC5);
+ vcpu->arch.pmc[5] = mfspr(SPRN_PMC6);
+ vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1);
+ vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2);
+ vcpu->arch.sdar = mfspr(SPRN_SDAR);
+ vcpu->arch.siar = mfspr(SPRN_SIAR);
+ vcpu->arch.sier[0] = mfspr(SPRN_SIER);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3);
+ vcpu->arch.sier[1] = mfspr(SPRN_SIER2);
+ vcpu->arch.sier[2] = mfspr(SPRN_SIER3);
+ }
+ } else {
+ freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
+ }
+}
+
+static void load_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
+{
+ if (ppc_get_pmu_inuse()) {
+ mtspr(SPRN_PMC1, host_os_sprs->pmc1);
+ mtspr(SPRN_PMC2, host_os_sprs->pmc2);
+ mtspr(SPRN_PMC3, host_os_sprs->pmc3);
+ mtspr(SPRN_PMC4, host_os_sprs->pmc4);
+ mtspr(SPRN_PMC5, host_os_sprs->pmc5);
+ mtspr(SPRN_PMC6, host_os_sprs->pmc6);
+ mtspr(SPRN_MMCR1, host_os_sprs->mmcr1);
+ mtspr(SPRN_MMCR2, host_os_sprs->mmcr2);
+ mtspr(SPRN_SDAR, host_os_sprs->sdar);
+ mtspr(SPRN_SIAR, host_os_sprs->siar);
+ mtspr(SPRN_SIER, host_os_sprs->sier1);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mtspr(SPRN_MMCR3, host_os_sprs->mmcr3);
+ mtspr(SPRN_SIER2, host_os_sprs->sier2);
+ mtspr(SPRN_SIER3, host_os_sprs->sier3);
+ }
+
+ /* Set MMCRA then MMCR0 last */
+ mtspr(SPRN_MMCRA, host_os_sprs->mmcra);
+ mtspr(SPRN_MMCR0, host_os_sprs->mmcr0);
+ isync();
+ }
+}
+
static void load_spr_state(struct kvm_vcpu *vcpu)
{
mtspr(SPRN_DSCR, vcpu->arch.dscr);
@@ -3777,17 +3959,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
vcpu->arch.dscr = mfspr(SPRN_DSCR);
}
-/*
- * Privileged (non-hypervisor) host registers to save.
- */
-struct p9_host_os_sprs {
- unsigned long dscr;
- unsigned long tidr;
- unsigned long iamr;
- unsigned long amr;
- unsigned long fscr;
-};
-
static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
{
host_os_sprs->dscr = mfspr(SPRN_DSCR);
@@ -3835,7 +4006,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
struct p9_host_os_sprs host_os_sprs;
s64 dec;
u64 tb, next_timer;
- int trap, save_pmu;
+ int trap;
WARN_ON_ONCE(vcpu->arch.ceded);
@@ -3848,7 +4019,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
save_p9_host_os_sprs(&host_os_sprs);
- kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
+ save_p9_host_pmu(&host_os_sprs);
kvmppc_subcore_enter_guest();
@@ -3878,7 +4049,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
barrier();
}
#endif
- kvmhv_load_guest_pmu(vcpu);
+ load_p9_guest_pmu(vcpu);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
load_fp_state(&vcpu->arch.fp);
@@ -4000,16 +4171,14 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
- save_pmu = 1;
if (vcpu->arch.vpa.pinned_addr) {
struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
lp->yield_count = cpu_to_be32(yield_count);
vcpu->arch.vpa.dirty = 1;
- save_pmu = lp->pmcregs_in_use;
}
- kvmhv_save_guest_pmu(vcpu, save_pmu);
+ save_p9_guest_pmu(vcpu);
#ifdef CONFIG_PPC_PSERIES
if (kvmhv_on_pseries()) {
barrier();
@@ -4025,7 +4194,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
- kvmhv_load_host_pmu();
+ load_p9_host_pmu(&host_os_sprs);
kvmppc_subcore_exit_guest();
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 4444f83cb133..59d89e4b154a 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -104,7 +104,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtlr r0
blr
-_GLOBAL(kvmhv_save_host_pmu)
+/*
+ * void kvmhv_save_host_pmu(void)
+ */
+kvmhv_save_host_pmu:
BEGIN_FTR_SECTION
/* Work around P8 PMAE bug */
li r3, -1
@@ -138,14 +141,6 @@ BEGIN_FTR_SECTION
std r8, HSTATE_MMCR2(r13)
std r9, HSTATE_SIER(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_MMCR3
- mfspr r6, SPRN_SIER2
- mfspr r7, SPRN_SIER3
- std r5, HSTATE_MMCR3(r13)
- std r6, HSTATE_SIER2(r13)
- std r7, HSTATE_SIER3(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
mfspr r3, SPRN_PMC1
mfspr r5, SPRN_PMC2
mfspr r6, SPRN_PMC3
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9021052f1579..551ce223b40c 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2738,10 +2738,11 @@ kvmppc_msr_interrupt:
blr
/*
+ * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu)
+ *
* Load up guest PMU state. R3 points to the vcpu struct.
*/
-_GLOBAL(kvmhv_load_guest_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
+kvmhv_load_guest_pmu:
mr r4, r3
mflr r0
li r3, 1
@@ -2775,27 +2776,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
mtspr SPRN_MMCRA, r6
mtspr SPRN_SIAR, r7
mtspr SPRN_SDAR, r8
-BEGIN_FTR_SECTION
- ld r5, VCPU_MMCR + 24(r4)
- ld r6, VCPU_SIER + 8(r4)
- ld r7, VCPU_SIER + 16(r4)
- mtspr SPRN_MMCR3, r5
- mtspr SPRN_SIER2, r6
- mtspr SPRN_SIER3, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
BEGIN_FTR_SECTION
ld r5, VCPU_MMCR + 16(r4)
ld r6, VCPU_SIER(r4)
mtspr SPRN_MMCR2, r5
mtspr SPRN_SIER, r6
-BEGIN_FTR_SECTION_NESTED(96)
lwz r7, VCPU_PMC + 24(r4)
lwz r8, VCPU_PMC + 28(r4)
ld r9, VCPU_MMCRS(r4)
mtspr SPRN_SPMC1, r7
mtspr SPRN_SPMC2, r8
mtspr SPRN_MMCRS, r9
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtspr SPRN_MMCR0, r3
isync
@@ -2803,10 +2794,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
blr
/*
+ * void kvmhv_load_host_pmu(void)
+ *
* Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
*/
-_GLOBAL(kvmhv_load_host_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
+kvmhv_load_host_pmu:
mflr r0
lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r4, 0
@@ -2844,25 +2836,18 @@ BEGIN_FTR_SECTION
mtspr SPRN_MMCR2, r8
mtspr SPRN_SIER, r9
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- ld r5, HSTATE_MMCR3(r13)
- ld r6, HSTATE_SIER2(r13)
- ld r7, HSTATE_SIER3(r13)
- mtspr SPRN_MMCR3, r5
- mtspr SPRN_SIER2, r6
- mtspr SPRN_SIER3, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
mtspr SPRN_MMCR0, r3
isync
mtlr r0
23: blr
/*
+ * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use)
+ *
* Save guest PMU state into the vcpu struct.
* r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
*/
-_GLOBAL(kvmhv_save_guest_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
+kvmhv_save_guest_pmu:
mr r9, r3
mr r8, r4
BEGIN_FTR_SECTION
@@ -2911,14 +2896,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
std r10, VCPU_MMCR + 16(r9)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_MMCR3
- mfspr r6, SPRN_SIER2
- mfspr r7, SPRN_SIER3
- std r5, VCPU_MMCR + 24(r9)
- std r6, VCPU_SIER + 8(r9)
- std r7, VCPU_SIER + 16(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
std r7, VCPU_SIAR(r9)
std r8, VCPU_SDAR(r9)
mfspr r3, SPRN_PMC1
@@ -2936,7 +2913,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
BEGIN_FTR_SECTION
mfspr r5, SPRN_SIER
std r5, VCPU_SIER(r9)
-BEGIN_FTR_SECTION_NESTED(96)
mfspr r6, SPRN_SPMC1
mfspr r7, SPRN_SPMC2
mfspr r8, SPRN_MMCRS
@@ -2945,7 +2921,6 @@ BEGIN_FTR_SECTION_NESTED(96)
std r8, VCPU_MMCRS(r9)
lis r4, 0x8000
mtspr SPRN_MMCRS, r4
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
22: blr
--
2.23.0
More information about the Linuxppc-dev
mailing list