[RFC PATCH 11/43] KVM: PPC: Book3S HV P9: Implement PMU save/restore in C
Athira Rajeev
atrajeev at linux.vnet.ibm.com
Sat Jul 10 12:47:17 AEST 2021
> On 22-Jun-2021, at 4:27 PM, Nicholas Piggin <npiggin at gmail.com> wrote:
>
> Implement the P9 path PMU save/restore code in C, and remove the
> POWER9/10 code from the P7/8 path assembly.
>
> -449 cycles (8533) POWER9 virt-mode NULL hcall
>
> Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
> ---
> arch/powerpc/include/asm/asm-prototypes.h | 5 -
> arch/powerpc/kvm/book3s_hv.c | 205 ++++++++++++++++++++--
> arch/powerpc/kvm/book3s_hv_interrupts.S | 13 +-
> arch/powerpc/kvm/book3s_hv_rmhandlers.S | 43 +----
> 4 files changed, 200 insertions(+), 66 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
> index 02ee6f5ac9fe..928db8ef9a5a 100644
> --- a/arch/powerpc/include/asm/asm-prototypes.h
> +++ b/arch/powerpc/include/asm/asm-prototypes.h
> @@ -136,11 +136,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
> bool preserve_nv) { }
> #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
>
> -void kvmhv_save_host_pmu(void);
> -void kvmhv_load_host_pmu(void);
> -void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
> -void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
> -
> void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
>
> long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index f7349d150828..b1b94b3563b7 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -3635,6 +3635,188 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
> trace_kvmppc_run_core(vc, 1);
> }
>
> +/*
> + * Privileged (non-hypervisor) host registers to save.
> + */
> +struct p9_host_os_sprs {
> + unsigned long dscr;
> + unsigned long tidr;
> + unsigned long iamr;
> + unsigned long amr;
> + unsigned long fscr;
> +
> + unsigned int pmc1;
> + unsigned int pmc2;
> + unsigned int pmc3;
> + unsigned int pmc4;
> + unsigned int pmc5;
> + unsigned int pmc6;
> + unsigned long mmcr0;
> + unsigned long mmcr1;
> + unsigned long mmcr2;
> + unsigned long mmcr3;
> + unsigned long mmcra;
> + unsigned long siar;
> + unsigned long sier1;
> + unsigned long sier2;
> + unsigned long sier3;
> + unsigned long sdar;
> +};
> +
> +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
> +{
> + if (!(mmcr0 & MMCR0_FC))
> + goto do_freeze;
> + if (mmcra & MMCRA_SAMPLE_ENABLE)
> + goto do_freeze;
> + if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> + if (!(mmcr0 & MMCR0_PMCCEXT))
> + goto do_freeze;
> + if (!(mmcra & MMCRA_BHRB_DISABLE))
> + goto do_freeze;
> + }
> + return;
Hi Nick
When freezing the PMU, do we need to also set pmcregs_in_use to zero ?
Also, why we need these above conditions like MMCRA_SAMPLE_ENABLE, MMCR0_PMCCEXT checks also before freezing ?
> +
> +do_freeze:
> + mmcr0 = MMCR0_FC;
> + mmcra = 0;
> + if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> + mmcr0 |= MMCR0_PMCCEXT;
> + mmcra = MMCRA_BHRB_DISABLE;
> + }
> +
> + mtspr(SPRN_MMCR0, mmcr0);
> + mtspr(SPRN_MMCRA, mmcra);
> + isync();
> +}
> +
> +static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
> +{
> + if (ppc_get_pmu_inuse()) {
> + /*
> + * It might be better to put PMU handling (at least for the
> + * host) in the perf subsystem because it knows more about what
> + * is being used.
> + */
> +
> + /* POWER9, POWER10 do not implement HPMC or SPMC */
> +
> + host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
> + host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
> +
> + freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
> +
> + host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
> + host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
> + host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
> + host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
> + host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
> + host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
> + host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
> + host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
> + host_os_sprs->sdar = mfspr(SPRN_SDAR);
> + host_os_sprs->siar = mfspr(SPRN_SIAR);
> + host_os_sprs->sier1 = mfspr(SPRN_SIER);
> +
> + if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> + host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
> + host_os_sprs->sier2 = mfspr(SPRN_SIER2);
> + host_os_sprs->sier3 = mfspr(SPRN_SIER3);
> + }
> + }
> +}
> +
> +static void load_p9_guest_pmu(struct kvm_vcpu *vcpu)
> +{
> + mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
> + mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
> + mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
> + mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
> + mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
> + mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
> + mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
> + mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
> + mtspr(SPRN_SDAR, vcpu->arch.sdar);
> + mtspr(SPRN_SIAR, vcpu->arch.siar);
> + mtspr(SPRN_SIER, vcpu->arch.sier[0]);
> +
> + if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> + mtspr(SPRN_MMCR3, vcpu->arch.mmcr[4]);
> + mtspr(SPRN_SIER2, vcpu->arch.sier[1]);
> + mtspr(SPRN_SIER3, vcpu->arch.sier[2]);
> + }
> +
> + /* Set MMCRA then MMCR0 last */
> + mtspr(SPRN_MMCRA, vcpu->arch.mmcra);
> + mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]);
> + /* No isync necessary because we're starting counters */
> +}
> +
> +static void save_p9_guest_pmu(struct kvm_vcpu *vcpu)
> +{
> + struct lppaca *lp;
> + int save_pmu = 1;
> +
> + lp = vcpu->arch.vpa.pinned_addr;
> + if (lp)
> + save_pmu = lp->pmcregs_in_use;
> +
> + if (save_pmu) {
> + vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0);
> + vcpu->arch.mmcra = mfspr(SPRN_MMCRA);
> +
> + freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra);
> +
> + vcpu->arch.pmc[0] = mfspr(SPRN_PMC1);
> + vcpu->arch.pmc[1] = mfspr(SPRN_PMC2);
> + vcpu->arch.pmc[2] = mfspr(SPRN_PMC3);
> + vcpu->arch.pmc[3] = mfspr(SPRN_PMC4);
> + vcpu->arch.pmc[4] = mfspr(SPRN_PMC5);
> + vcpu->arch.pmc[5] = mfspr(SPRN_PMC6);
> + vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1);
> + vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2);
> + vcpu->arch.sdar = mfspr(SPRN_SDAR);
> + vcpu->arch.siar = mfspr(SPRN_SIAR);
> + vcpu->arch.sier[0] = mfspr(SPRN_SIER);
> +
> + if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> + vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3);
> + vcpu->arch.sier[1] = mfspr(SPRN_SIER2);
> + vcpu->arch.sier[2] = mfspr(SPRN_SIER3);
> + }
> + } else {
> + freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
> + }
> +}
> +
> +static void load_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
> +{
> + if (ppc_get_pmu_inuse()) {
> + mtspr(SPRN_PMC1, host_os_sprs->pmc1);
> + mtspr(SPRN_PMC2, host_os_sprs->pmc2);
> + mtspr(SPRN_PMC3, host_os_sprs->pmc3);
> + mtspr(SPRN_PMC4, host_os_sprs->pmc4);
> + mtspr(SPRN_PMC5, host_os_sprs->pmc5);
> + mtspr(SPRN_PMC6, host_os_sprs->pmc6);
> + mtspr(SPRN_MMCR1, host_os_sprs->mmcr1);
> + mtspr(SPRN_MMCR2, host_os_sprs->mmcr2);
> + mtspr(SPRN_SDAR, host_os_sprs->sdar);
> + mtspr(SPRN_SIAR, host_os_sprs->siar);
> + mtspr(SPRN_SIER, host_os_sprs->sier1);
> +
> + if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> + mtspr(SPRN_MMCR3, host_os_sprs->mmcr3);
> + mtspr(SPRN_SIER2, host_os_sprs->sier2);
> + mtspr(SPRN_SIER3, host_os_sprs->sier3);
> + }
> +
> + /* Set MMCRA then MMCR0 last */
> + mtspr(SPRN_MMCRA, host_os_sprs->mmcra);
> + mtspr(SPRN_MMCR0, host_os_sprs->mmcr0);
> + isync();
> + }
> +}
> +
> static void load_spr_state(struct kvm_vcpu *vcpu)
> {
> mtspr(SPRN_DSCR, vcpu->arch.dscr);
> @@ -3677,17 +3859,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
> vcpu->arch.dscr = mfspr(SPRN_DSCR);
> }
>
> -/*
> - * Privileged (non-hypervisor) host registers to save.
> - */
> -struct p9_host_os_sprs {
> - unsigned long dscr;
> - unsigned long tidr;
> - unsigned long iamr;
> - unsigned long amr;
> - unsigned long fscr;
> -};
> -
> static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
> {
> host_os_sprs->dscr = mfspr(SPRN_DSCR);
> @@ -3735,7 +3906,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
> struct p9_host_os_sprs host_os_sprs;
> s64 dec;
> u64 tb, next_timer;
> - int trap, save_pmu;
> + int trap;
>
> WARN_ON_ONCE(vcpu->arch.ceded);
>
> @@ -3748,7 +3919,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
>
> save_p9_host_os_sprs(&host_os_sprs);
>
> - kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
> + save_p9_host_pmu(&host_os_sprs);
>
> kvmppc_subcore_enter_guest();
>
> @@ -3776,7 +3947,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
> }
> }
> #endif
> - kvmhv_load_guest_pmu(vcpu);
> + load_p9_guest_pmu(vcpu);
>
> msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
> load_fp_state(&vcpu->arch.fp);
> @@ -3898,16 +4069,14 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
> cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
> kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
>
> - save_pmu = 1;
> if (vcpu->arch.vpa.pinned_addr) {
> struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
> u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
> lp->yield_count = cpu_to_be32(yield_count);
> vcpu->arch.vpa.dirty = 1;
> - save_pmu = lp->pmcregs_in_use;
> }
>
> - kvmhv_save_guest_pmu(vcpu, save_pmu);
> + save_p9_guest_pmu(vcpu);
> #ifdef CONFIG_PPC_PSERIES
> if (kvmhv_on_pseries())
> get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
> @@ -3920,7 +4089,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
>
> mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
>
> - kvmhv_load_host_pmu();
> + load_p9_host_pmu(&host_os_sprs);
>
> kvmppc_subcore_exit_guest();
>
> diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
> index 4444f83cb133..59d89e4b154a 100644
> --- a/arch/powerpc/kvm/book3s_hv_interrupts.S
> +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
> @@ -104,7 +104,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
> mtlr r0
> blr
>
> -_GLOBAL(kvmhv_save_host_pmu)
> +/*
> + * void kvmhv_save_host_pmu(void)
> + */
> +kvmhv_save_host_pmu:
> BEGIN_FTR_SECTION
> /* Work around P8 PMAE bug */
> li r3, -1
> @@ -138,14 +141,6 @@ BEGIN_FTR_SECTION
> std r8, HSTATE_MMCR2(r13)
> std r9, HSTATE_SIER(r13)
> END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> -BEGIN_FTR_SECTION
> - mfspr r5, SPRN_MMCR3
> - mfspr r6, SPRN_SIER2
> - mfspr r7, SPRN_SIER3
> - std r5, HSTATE_MMCR3(r13)
> - std r6, HSTATE_SIER2(r13)
> - std r7, HSTATE_SIER3(r13)
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
> mfspr r3, SPRN_PMC1
> mfspr r5, SPRN_PMC2
> mfspr r6, SPRN_PMC3
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 007f87b97184..0eb06734bc26 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -2780,10 +2780,11 @@ kvmppc_msr_interrupt:
> blr
>
> /*
> + * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu)
> + *
> * Load up guest PMU state. R3 points to the vcpu struct.
> */
> -_GLOBAL(kvmhv_load_guest_pmu)
> -EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
> +kvmhv_load_guest_pmu:
> mr r4, r3
> mflr r0
> li r3, 1
> @@ -2817,27 +2818,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
> mtspr SPRN_MMCRA, r6
> mtspr SPRN_SIAR, r7
> mtspr SPRN_SDAR, r8
> -BEGIN_FTR_SECTION
> - ld r5, VCPU_MMCR + 24(r4)
> - ld r6, VCPU_SIER + 8(r4)
> - ld r7, VCPU_SIER + 16(r4)
> - mtspr SPRN_MMCR3, r5
> - mtspr SPRN_SIER2, r6
> - mtspr SPRN_SIER3, r7
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
> BEGIN_FTR_SECTION
> ld r5, VCPU_MMCR + 16(r4)
> ld r6, VCPU_SIER(r4)
> mtspr SPRN_MMCR2, r5
> mtspr SPRN_SIER, r6
> -BEGIN_FTR_SECTION_NESTED(96)
> lwz r7, VCPU_PMC + 24(r4)
> lwz r8, VCPU_PMC + 28(r4)
> ld r9, VCPU_MMCRS(r4)
> mtspr SPRN_SPMC1, r7
> mtspr SPRN_SPMC2, r8
> mtspr SPRN_MMCRS, r9
> -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
> END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> mtspr SPRN_MMCR0, r3
> isync
> @@ -2845,10 +2836,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> blr
>
> /*
> + * void kvmhv_load_host_pmu(void)
> + *
> * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
> */
> -_GLOBAL(kvmhv_load_host_pmu)
> -EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
> +kvmhv_load_host_pmu:
> mflr r0
> lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
> cmpwi r4, 0
> @@ -2886,25 +2878,18 @@ BEGIN_FTR_SECTION
> mtspr SPRN_MMCR2, r8
> mtspr SPRN_SIER, r9
> END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> -BEGIN_FTR_SECTION
> - ld r5, HSTATE_MMCR3(r13)
> - ld r6, HSTATE_SIER2(r13)
> - ld r7, HSTATE_SIER3(r13)
> - mtspr SPRN_MMCR3, r5
> - mtspr SPRN_SIER2, r6
> - mtspr SPRN_SIER3, r7
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
> mtspr SPRN_MMCR0, r3
> isync
> mtlr r0
> 23: blr
>
> /*
> + * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use)
> + *
> * Save guest PMU state into the vcpu struct.
> * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
> */
> -_GLOBAL(kvmhv_save_guest_pmu)
> -EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
> +kvmhv_save_guest_pmu:
> mr r9, r3
> mr r8, r4
> BEGIN_FTR_SECTION
> @@ -2953,14 +2938,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> BEGIN_FTR_SECTION
> std r10, VCPU_MMCR + 16(r9)
> END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> -BEGIN_FTR_SECTION
> - mfspr r5, SPRN_MMCR3
> - mfspr r6, SPRN_SIER2
> - mfspr r7, SPRN_SIER3
> - std r5, VCPU_MMCR + 24(r9)
> - std r6, VCPU_SIER + 8(r9)
> - std r7, VCPU_SIER + 16(r9)
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
> std r7, VCPU_SIAR(r9)
> std r8, VCPU_SDAR(r9)
> mfspr r3, SPRN_PMC1
> @@ -2978,7 +2955,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
> BEGIN_FTR_SECTION
> mfspr r5, SPRN_SIER
> std r5, VCPU_SIER(r9)
> -BEGIN_FTR_SECTION_NESTED(96)
> mfspr r6, SPRN_SPMC1
> mfspr r7, SPRN_SPMC2
> mfspr r8, SPRN_MMCRS
> @@ -2987,7 +2963,6 @@ BEGIN_FTR_SECTION_NESTED(96)
> std r8, VCPU_MMCRS(r9)
> lis r4, 0x8000
> mtspr SPRN_MMCRS, r4
> -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
> END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> 22: blr
>
> --
> 2.23.0
>
More information about the Linuxppc-dev
mailing list