[PATCH 2/2] KVM: PPC: hypervisor large decrementer support

Balbir Singh bsingharora at gmail.com
Tue Apr 12 17:35:02 AEST 2016



On 12/04/16 14:38, Oliver O'Halloran wrote:
> Power ISAv3 extends the width of the decrementer register from 32 bits.
> The enlarged register width is implementation dependent, but reads from
> these registers are automatically sign extended to produce a 64 bit output
> when operating in large mode. The HDEC always operates in large mode
> while the DEC register can be operated in 32bit mode or large mode
> depending on the setting of the LPCR.LD bit.
> 
> Currently the hypervisor assumes that reads from the DEC and HDEC register
> produce a 32 bit result which it sign extends to 64 bits using the extsw
> instruction. This behaviour can result in the guest DEC register value
> being corrupted by the hypervisor when the guest is operating in LD mode since
> the results of the extsw instruction only depends on the value of bit
> 31 in the register to be sign extended.
> 
> This patch adds the GET_DEC() and GET_HDEC() assembly macros for reading
> from the decrementer registers. These macros will return the current
> decrementer value as a 64 bit quantity regardless of the Host CPU or
> guest decrementer operating mode. Additionally this patch corrects several
> uses of decrementer values that assume a 32 bit register width.
> 
> Signed-off-by: Oliver O'Halloran <oohall at gmail.com>
> Cc: Paul Mackerras <paulus at samba.org>
> ---
>  arch/powerpc/include/asm/exception-64s.h | 22 ++++++++++++++++++
>  arch/powerpc/include/asm/kvm_host.h      |  2 +-
>  arch/powerpc/include/asm/kvm_ppc.h       |  2 +-
>  arch/powerpc/include/uapi/asm/kvm.h      |  2 +-
>  arch/powerpc/kernel/exceptions-64s.S     |  9 +++++++-
>  arch/powerpc/kvm/book3s_hv_interrupts.S  |  3 +--
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 38 ++++++++++++++++++--------------
>  arch/powerpc/kvm/emulate.c               |  4 ++--
>  8 files changed, 57 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
> index 93ae809fe5ea..d922f76c682d 100644
> --- a/arch/powerpc/include/asm/exception-64s.h
> +++ b/arch/powerpc/include/asm/exception-64s.h
> @@ -545,4 +545,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
>  #define FINISH_NAP
>  #endif
>  
> +/* these ensure that we always get a 64bit value from the
> + * decrementer register. */
Comment style does not match recommended style

> +
> +#define IS_LD_ENABLED(reg)                 \
> +	mfspr  reg,SPRN_LPCR;              \
> +	andis. reg,reg,(LPCR_LD >> 16);
> +
> +#define GET_DEC(reg)                       \
> +	IS_LD_ENABLED(reg);                \
> +	mfspr reg, SPRN_DEC;               \
> +	bne 99f;                           \
> +	extsw reg, reg;                    \
> +99:
> +
> +/* For CPUs that support it the Hypervisor LD is
> + * always enabled, so this needs to be feature gated */
> +#define GET_HDEC(reg) \
> +	mfspr reg, SPRN_HDEC;           \
> +BEGIN_FTR_SECTION                       \
> +	extsw reg, reg;                 \
> +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
> +
>  #endif	/* _ASM_POWERPC_EXCEPTION_H */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index d7b343170453..6330d3fca083 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -516,7 +516,7 @@ struct kvm_vcpu_arch {
>  	ulong mcsrr0;
>  	ulong mcsrr1;
>  	ulong mcsr;
> -	u32 dec;
> +	u64 dec;
>  #ifdef CONFIG_BOOKE
>  	u32 decar;
>  #endif
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 2544edabe7f3..4de0102930e9 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -94,7 +94,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
>  extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
>  extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
>  extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> -extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> +extern u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
>  extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
>  extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
>  extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> index c93cf35ce379..2dd92e841127 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -215,7 +215,7 @@ struct kvm_sregs {
>  			__u32 tsr;	/* KVM_SREGS_E_UPDATE_TSR */
>  			__u32 tcr;
>  			__u32 decar;
> -			__u32 dec;	/* KVM_SREGS_E_UPDATE_DEC */
> +			__u64 dec;	/* KVM_SREGS_E_UPDATE_DEC */
>  
>  			/*
>  			 * Userspace can read TB directly, but the
> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index 7716cebf4b8e..984ae894e758 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -641,7 +641,14 @@ masked_##_H##interrupt:					\
>  	stb	r11,PACAIRQHAPPENED(r13);		\
>  	cmpwi	r10,PACA_IRQ_DEC;			\
>  	bne	1f;					\
> -	lis	r10,0x7fff;				\
> +	mfspr	r10,SPRN_LPCR;				\
> +	andis.	r10,r10,(LPCR_LD >> 16);		\
> +	beq	3f;					\
> +	LOAD_REG_ADDR(r10, decrementer_max);		\

I presume this works because all of this is w.r.t. kernel toc
and not necessarily in the kvm module

> +	ld r10,0(r10);					\
> +	mtspr	SPRN_DEC,r10;				\
> +	b	2f;					\
> +3:	lis	r10,0x7fff;				\
>  	ori	r10,r10,0xffff;				\
>  	mtspr	SPRN_DEC,r10;				\
>  	b	2f;					\
> diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
> index 0fdc4a28970b..b408f72385e4 100644
> --- a/arch/powerpc/kvm/book3s_hv_interrupts.S
> +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
> @@ -121,10 +121,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
>  	 * Put whatever is in the decrementer into the
>  	 * hypervisor decrementer.
>  	 */
> -	mfspr	r8,SPRN_DEC
> +	GET_DEC(r8)
>  	mftb	r7
>  	mtspr	SPRN_HDEC,r8
> -	extsw	r8,r8
>  	add	r8,r8,r7
>  	std	r8,HSTATE_DECEXP(r13)
>  
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index e571ad277398..718e5581494e 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -183,6 +183,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
>  kvmppc_primary_no_guest:
>  	/* We handle this much like a ceded vcpu */
>  	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
> +
> +	/* XXX: ISA v3 only says the HDEC is at least as large as the DEC, but
> +	 *      this code assumes we can fit HDEC in DEC. This is probably
> +	 *      not an issue in practice, but... */
>  	mfspr	r3, SPRN_HDEC
>  	mtspr	SPRN_DEC, r3
>  	/*
> @@ -249,9 +253,9 @@ kvm_novcpu_wakeup:
>  	bge	kvm_novcpu_exit
>  
>  	/* See if our timeslice has expired (HDEC is negative) */
> -	mfspr	r0, SPRN_HDEC
> +	GET_HDEC(r0);
>  	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
> -	cmpwi	r0, 0
> +	cmpdi	r0, 0
>  	blt	kvm_novcpu_exit
>  
>  	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
> @@ -340,8 +344,9 @@ kvm_secondary_got_guest:
>  	lbz	r4, HSTATE_PTID(r13)
>  	cmpwi	r4, 0
>  	bne	63f
> -	lis	r6, 0x7fff
> -	ori	r6, r6, 0xffff
> +
> +	LOAD_REG_ADDR(r6, decrementer_max);
> +	ld	r6,0(r6);
>  	mtspr	SPRN_HDEC, r6
>  	/* and set per-LPAR registers, if doing dynamic micro-threading */
>  	ld	r6, HSTATE_SPLIT_MODE(r13)
> @@ -897,7 +902,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
>  	mftb	r7
>  	subf	r3,r7,r8
>  	mtspr	SPRN_DEC,r3
> -	stw	r3,VCPU_DEC(r4)
> +	std	r3,VCPU_DEC(r4)
>  
>  	ld	r5, VCPU_SPRG0(r4)
>  	ld	r6, VCPU_SPRG1(r4)
> @@ -953,8 +958,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
>  	isync
>  
>  	/* Check if HDEC expires soon */
> -	mfspr	r3, SPRN_HDEC
> -	cmpwi	r3, 512		/* 1 microsecond */
> +	GET_HDEC(r3)
> +	cmpdi	r3, 512		/* 1 microsecond */
>  	blt	hdec_soon
>  
>  	ld	r6, VCPU_CTR(r4)
> @@ -990,8 +995,9 @@ deliver_guest_interrupt:
>  	beq	5f
>  	li	r0, BOOK3S_INTERRUPT_EXTERNAL
>  	bne	cr1, 12f
> -	mfspr	r0, SPRN_DEC
> -	cmpwi	r0, 0
> +
> +	GET_DEC(r0)
> +	cmpdi	r0, 0
>  	li	r0, BOOK3S_INTERRUPT_DECREMENTER
>  	bge	5f
>  
> @@ -1206,8 +1212,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>  	/* See if this is a leftover HDEC interrupt */
>  	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
>  	bne	2f
> -	mfspr	r3,SPRN_HDEC
> -	cmpwi	r3,0
> +	GET_HDEC(r3);
> +	cmpdi	r3,0
>  	mr	r4,r9
>  	bge	fast_guest_return
>  2:
> @@ -1326,9 +1332,8 @@ mc_cont:
>  	mtspr	SPRN_SPURR,r4
>  
>  	/* Save DEC */
> -	mfspr	r5,SPRN_DEC
> +	GET_DEC(r5);
>  	mftb	r6
> -	extsw	r5,r5
>  	add	r5,r5,r6
>  	/* r5 is a guest timebase value here, convert to host TB */
>  	ld	r3,HSTATE_KVM_VCORE(r13)
> @@ -2250,15 +2255,14 @@ _GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
>  	 * no later than the end of our timeslice (HDEC interrupts
>  	 * don't wake us from nap).
>  	 */
> -	mfspr	r3, SPRN_DEC
> -	mfspr	r4, SPRN_HDEC
> +	GET_DEC(r3);
> +	GET_HDEC(r4);
>  	mftb	r5
> -	cmpw	r3, r4
> +	cmpd	r3, r4
>  	ble	67f
>  	mtspr	SPRN_DEC, r4
>  67:
>  	/* save expiry time of guest decrementer */
> -	extsw	r3, r3
>  	add	r3, r3, r5
>  	ld	r4, HSTATE_KVM_VCPU(r13)
>  	ld	r5, HSTATE_KVM_VCORE(r13)
> diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
> index 5cc2e7af3a7b..4d26252162b0 100644
> --- a/arch/powerpc/kvm/emulate.c
> +++ b/arch/powerpc/kvm/emulate.c
> @@ -47,7 +47,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
>  	kvmppc_core_dequeue_dec(vcpu);
>  
>  	/* POWER4+ triggers a dec interrupt if the value is < 0 */
> -	if (vcpu->arch.dec & 0x80000000) {
> +	if ((s64) vcpu->arch.dec < 0) {
>  		kvmppc_core_queue_dec(vcpu);
>  		return;
>  	}
> @@ -78,7 +78,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
>  	vcpu->arch.dec_jiffies = get_tb();
>  }
>  
> -u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
> +u64 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
>  {
>  	u64 jd = tb - vcpu->arch.dec_jiffies;
>  
> 


More information about the Linuxppc-dev mailing list