[PATCH 2/2] KVM: PPC: hypervisor large decrementer support
Balbir Singh
bsingharora at gmail.com
Tue Apr 12 17:35:02 AEST 2016
On 12/04/16 14:38, Oliver O'Halloran wrote:
> Power ISAv3 extends the width of the decrementer register from 32 bits.
> The enlarged register width is implementation dependent, but reads from
> these registers are automatically sign extended to produce a 64 bit output
> when operating in large mode. The HDEC always operates in large mode
> while the DEC register can be operated in 32bit mode or large mode
> depending on the setting of the LPCR.LD bit.
>
> Currently the hypervisor assumes that reads from the DEC and HDEC register
> produce a 32 bit result which it sign extends to 64 bits using the extsw
> instruction. This behaviour can result in the guest DEC register value
> being corrupted by the hypervisor when the guest is operating in LD mode since
> the results of the extsw instruction only depends on the value of bit
> 31 in the register to be sign extended.
>
> This patch adds the GET_DEC() and GET_HDEC() assembly macros for reading
> from the decrementer registers. These macros will return the current
> decrementer value as a 64 bit quantity regardless of the Host CPU or
> guest decrementer operating mode. Additionally this patch corrects several
> uses of decrementer values that assume a 32 bit register width.
>
> Signed-off-by: Oliver O'Halloran <oohall at gmail.com>
> Cc: Paul Mackerras <paulus at samba.org>
> ---
> arch/powerpc/include/asm/exception-64s.h | 22 ++++++++++++++++++
> arch/powerpc/include/asm/kvm_host.h | 2 +-
> arch/powerpc/include/asm/kvm_ppc.h | 2 +-
> arch/powerpc/include/uapi/asm/kvm.h | 2 +-
> arch/powerpc/kernel/exceptions-64s.S | 9 +++++++-
> arch/powerpc/kvm/book3s_hv_interrupts.S | 3 +--
> arch/powerpc/kvm/book3s_hv_rmhandlers.S | 38 ++++++++++++++++++--------------
> arch/powerpc/kvm/emulate.c | 4 ++--
> 8 files changed, 57 insertions(+), 25 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
> index 93ae809fe5ea..d922f76c682d 100644
> --- a/arch/powerpc/include/asm/exception-64s.h
> +++ b/arch/powerpc/include/asm/exception-64s.h
> @@ -545,4 +545,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
> #define FINISH_NAP
> #endif
>
> +/* these ensure that we always get a 64bit value from the
> + * decrementer register. */
Comment style does not match recommended style
> +
> +#define IS_LD_ENABLED(reg) \
> + mfspr reg,SPRN_LPCR; \
> + andis. reg,reg,(LPCR_LD >> 16);
> +
> +#define GET_DEC(reg) \
> + IS_LD_ENABLED(reg); \
> + mfspr reg, SPRN_DEC; \
> + bne 99f; \
> + extsw reg, reg; \
> +99:
> +
> +/* For CPUs that support it the Hypervisor LD is
> + * always enabled, so this needs to be feature gated */
> +#define GET_HDEC(reg) \
> + mfspr reg, SPRN_HDEC; \
> +BEGIN_FTR_SECTION \
> + extsw reg, reg; \
> +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
> +
> #endif /* _ASM_POWERPC_EXCEPTION_H */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index d7b343170453..6330d3fca083 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -516,7 +516,7 @@ struct kvm_vcpu_arch {
> ulong mcsrr0;
> ulong mcsrr1;
> ulong mcsr;
> - u32 dec;
> + u64 dec;
> #ifdef CONFIG_BOOKE
> u32 decar;
> #endif
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 2544edabe7f3..4de0102930e9 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -94,7 +94,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
> extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
> extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
> extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> -extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> +extern u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
> extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
> extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> index c93cf35ce379..2dd92e841127 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -215,7 +215,7 @@ struct kvm_sregs {
> __u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */
> __u32 tcr;
> __u32 decar;
> - __u32 dec; /* KVM_SREGS_E_UPDATE_DEC */
> + __u64 dec; /* KVM_SREGS_E_UPDATE_DEC */
>
> /*
> * Userspace can read TB directly, but the
> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index 7716cebf4b8e..984ae894e758 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -641,7 +641,14 @@ masked_##_H##interrupt: \
> stb r11,PACAIRQHAPPENED(r13); \
> cmpwi r10,PACA_IRQ_DEC; \
> bne 1f; \
> - lis r10,0x7fff; \
> + mfspr r10,SPRN_LPCR; \
> + andis. r10,r10,(LPCR_LD >> 16); \
> + beq 3f; \
> + LOAD_REG_ADDR(r10, decrementer_max); \
I presume this works because all of this is w.r.t. kernel toc
and not necessarily in the kvm module
> + ld r10,0(r10); \
> + mtspr SPRN_DEC,r10; \
> + b 2f; \
> +3: lis r10,0x7fff; \
> ori r10,r10,0xffff; \
> mtspr SPRN_DEC,r10; \
> b 2f; \
> diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
> index 0fdc4a28970b..b408f72385e4 100644
> --- a/arch/powerpc/kvm/book3s_hv_interrupts.S
> +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
> @@ -121,10 +121,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> * Put whatever is in the decrementer into the
> * hypervisor decrementer.
> */
> - mfspr r8,SPRN_DEC
> + GET_DEC(r8)
> mftb r7
> mtspr SPRN_HDEC,r8
> - extsw r8,r8
> add r8,r8,r7
> std r8,HSTATE_DECEXP(r13)
>
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index e571ad277398..718e5581494e 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -183,6 +183,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> kvmppc_primary_no_guest:
> /* We handle this much like a ceded vcpu */
> /* put the HDEC into the DEC, since HDEC interrupts don't wake us */
> +
> + /* XXX: ISA v3 only says the HDEC is at least as large as the DEC, but
> + * this code assumes we can fit HDEC in DEC. This is probably
> + * not an issue in practice, but... */
> mfspr r3, SPRN_HDEC
> mtspr SPRN_DEC, r3
> /*
> @@ -249,9 +253,9 @@ kvm_novcpu_wakeup:
> bge kvm_novcpu_exit
>
> /* See if our timeslice has expired (HDEC is negative) */
> - mfspr r0, SPRN_HDEC
> + GET_HDEC(r0);
> li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
> - cmpwi r0, 0
> + cmpdi r0, 0
> blt kvm_novcpu_exit
>
> /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
> @@ -340,8 +344,9 @@ kvm_secondary_got_guest:
> lbz r4, HSTATE_PTID(r13)
> cmpwi r4, 0
> bne 63f
> - lis r6, 0x7fff
> - ori r6, r6, 0xffff
> +
> + LOAD_REG_ADDR(r6, decrementer_max);
> + ld r6,0(r6);
> mtspr SPRN_HDEC, r6
> /* and set per-LPAR registers, if doing dynamic micro-threading */
> ld r6, HSTATE_SPLIT_MODE(r13)
> @@ -897,7 +902,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
> mftb r7
> subf r3,r7,r8
> mtspr SPRN_DEC,r3
> - stw r3,VCPU_DEC(r4)
> + std r3,VCPU_DEC(r4)
>
> ld r5, VCPU_SPRG0(r4)
> ld r6, VCPU_SPRG1(r4)
> @@ -953,8 +958,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
> isync
>
> /* Check if HDEC expires soon */
> - mfspr r3, SPRN_HDEC
> - cmpwi r3, 512 /* 1 microsecond */
> + GET_HDEC(r3)
> + cmpdi r3, 512 /* 1 microsecond */
> blt hdec_soon
>
> ld r6, VCPU_CTR(r4)
> @@ -990,8 +995,9 @@ deliver_guest_interrupt:
> beq 5f
> li r0, BOOK3S_INTERRUPT_EXTERNAL
> bne cr1, 12f
> - mfspr r0, SPRN_DEC
> - cmpwi r0, 0
> +
> + GET_DEC(r0)
> + cmpdi r0, 0
> li r0, BOOK3S_INTERRUPT_DECREMENTER
> bge 5f
>
> @@ -1206,8 +1212,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
> /* See if this is a leftover HDEC interrupt */
> cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
> bne 2f
> - mfspr r3,SPRN_HDEC
> - cmpwi r3,0
> + GET_HDEC(r3);
> + cmpdi r3,0
> mr r4,r9
> bge fast_guest_return
> 2:
> @@ -1326,9 +1332,8 @@ mc_cont:
> mtspr SPRN_SPURR,r4
>
> /* Save DEC */
> - mfspr r5,SPRN_DEC
> + GET_DEC(r5);
> mftb r6
> - extsw r5,r5
> add r5,r5,r6
> /* r5 is a guest timebase value here, convert to host TB */
> ld r3,HSTATE_KVM_VCORE(r13)
> @@ -2250,15 +2255,14 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
> * no later than the end of our timeslice (HDEC interrupts
> * don't wake us from nap).
> */
> - mfspr r3, SPRN_DEC
> - mfspr r4, SPRN_HDEC
> + GET_DEC(r3);
> + GET_HDEC(r4);
> mftb r5
> - cmpw r3, r4
> + cmpd r3, r4
> ble 67f
> mtspr SPRN_DEC, r4
> 67:
> /* save expiry time of guest decrementer */
> - extsw r3, r3
> add r3, r3, r5
> ld r4, HSTATE_KVM_VCPU(r13)
> ld r5, HSTATE_KVM_VCORE(r13)
> diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
> index 5cc2e7af3a7b..4d26252162b0 100644
> --- a/arch/powerpc/kvm/emulate.c
> +++ b/arch/powerpc/kvm/emulate.c
> @@ -47,7 +47,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
> kvmppc_core_dequeue_dec(vcpu);
>
> /* POWER4+ triggers a dec interrupt if the value is < 0 */
> - if (vcpu->arch.dec & 0x80000000) {
> + if ((s64) vcpu->arch.dec < 0) {
> kvmppc_core_queue_dec(vcpu);
> return;
> }
> @@ -78,7 +78,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
> vcpu->arch.dec_jiffies = get_tb();
> }
>
> -u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
> +u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
> {
> u64 jd = tb - vcpu->arch.dec_jiffies;
>
>
More information about the Linuxppc-dev
mailing list