[PATCH 1/3] KVM: PPC: Book3S: Change interrupt call to reduce scratch space use on HV

Paul Mackerras paulus at ozlabs.org
Tue Dec 6 17:09:07 AEDT 2016


On Thu, Dec 01, 2016 at 06:18:10PM +1100, Nicholas Piggin wrote:
> Change the calling convention to put the trap number together with
> CR in two halves of r12, which frees up HSTATE_SCRATCH2 in the HV
> handler, and r9 free.

Cute idea!  Some comments below...

> The 64-bit PR handler entry translates the calling convention back
> to match the previous call convention (i.e., shared with 32-bit), for
> simplicity.
> 
> Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
> ---
>  arch/powerpc/include/asm/exception-64s.h | 28 +++++++++++++++-------------
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 15 +++++++--------
>  arch/powerpc/kvm/book3s_segment.S        | 27 ++++++++++++++++++++-------
>  3 files changed, 42 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
> index 9a3eee6..bc8fc45 100644
> --- a/arch/powerpc/include/asm/exception-64s.h
> +++ b/arch/powerpc/include/asm/exception-64s.h
> @@ -233,7 +233,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
>  
>  #endif
>  
> -#define __KVM_HANDLER_PROLOG(area, n)					\
> +#define __KVM_HANDLER(area, h, n)					\
>  	BEGIN_FTR_SECTION_NESTED(947)					\
>  	ld	r10,area+EX_CFAR(r13);					\
>  	std	r10,HSTATE_CFAR(r13);					\
> @@ -243,30 +243,32 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
>  	std	r10,HSTATE_PPR(r13);					\
>  	END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948);	\
>  	ld	r10,area+EX_R10(r13);					\
> -	stw	r9,HSTATE_SCRATCH1(r13);				\
> -	ld	r9,area+EX_R9(r13);					\
>  	std	r12,HSTATE_SCRATCH0(r13);				\
> -
> -#define __KVM_HANDLER(area, h, n)					\
> -	__KVM_HANDLER_PROLOG(area, n)					\
> -	li	r12,n;							\
> +	li	r12,(n);						\
> +	sldi	r12,r12,32;						\
> +	or	r12,r12,r9;						\

Did you consider doing it the other way around, i.e. with r12
containing (cr << 32) | trap?  That would save 1 instruction in each
handler:

+	sldi	r12,r9,32;						\
+	ori	r12,r12,(n);						\

> +	ld	r9,area+EX_R9(r13);					\
> +	std	r9,HSTATE_SCRATCH1(r13);				\

Why not put this std in kvmppc_interrupt[_hv] rather than in each
handler?

>  	b	kvmppc_interrupt
>  
>  #define __KVM_HANDLER_SKIP(area, h, n)					\
>  	cmpwi	r10,KVM_GUEST_MODE_SKIP;				\
> -	ld	r10,area+EX_R10(r13);					\
>  	beq	89f;							\
> -	stw	r9,HSTATE_SCRATCH1(r13);				\
>  	BEGIN_FTR_SECTION_NESTED(948)					\
> -	ld	r9,area+EX_PPR(r13);					\
> -	std	r9,HSTATE_PPR(r13);					\
> +	ld	r10,area+EX_PPR(r13);					\
> +	std	r10,HSTATE_PPR(r13);					\
>  	END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948);	\
> -	ld	r9,area+EX_R9(r13);					\
> +	ld	r10,area+EX_R10(r13);					\
>  	std	r12,HSTATE_SCRATCH0(r13);				\
> -	li	r12,n;							\
> +	li	r12,(n);						\
> +	sldi	r12,r12,32;						\
> +	or	r12,r12,r9;						\
> +	ld	r9,area+EX_R9(r13);					\
> +	std	r9,HSTATE_SCRATCH1(r13);				\

Same comment again, of course.

>  	b	kvmppc_interrupt;					\
>  89:	mtocrf	0x80,r9;						\
>  	ld	r9,area+EX_R9(r13);					\
> +	ld	r10,area+EX_R10(r13);					\
>  	b	kvmppc_skip_##h##interrupt
>  
>  #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index c3c1d1b..0536c73 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -1043,19 +1043,18 @@ hdec_soon:
>  kvmppc_interrupt_hv:
>  	/*
>  	 * Register contents:
> -	 * R12		= interrupt vector
> +	 * R12		= (interrupt vector << 32) | guest CR
>  	 * R13		= PACA
> -	 * guest CR, R12 saved in shadow VCPU SCRATCH1/0
> +	 * R9		= unused
> +	 * guest R12, R9 saved in shadow VCPU SCRATCH0/1 respectively
>  	 * guest R13 saved in SPRN_SCRATCH0
>  	 */
> -	std	r9, HSTATE_SCRATCH2(r13)
> -
>  	lbz	r9, HSTATE_IN_GUEST(r13)
>  	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
>  	beq	kvmppc_bad_host_intr
>  #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
>  	cmpwi	r9, KVM_GUEST_MODE_GUEST
> -	ld	r9, HSTATE_SCRATCH2(r13)
> +	ld	r9, HSTATE_SCRATCH1(r13)
>  	beq	kvmppc_interrupt_pr
>  #endif
>  	/* We're now back in the host but in guest MMU context */
> @@ -1075,14 +1074,13 @@ kvmppc_interrupt_hv:
>  	std	r6, VCPU_GPR(R6)(r9)
>  	std	r7, VCPU_GPR(R7)(r9)
>  	std	r8, VCPU_GPR(R8)(r9)
> -	ld	r0, HSTATE_SCRATCH2(r13)
> +	ld	r0, HSTATE_SCRATCH1(r13)
>  	std	r0, VCPU_GPR(R9)(r9)
>  	std	r10, VCPU_GPR(R10)(r9)
>  	std	r11, VCPU_GPR(R11)(r9)
>  	ld	r3, HSTATE_SCRATCH0(r13)
> -	lwz	r4, HSTATE_SCRATCH1(r13)
>  	std	r3, VCPU_GPR(R12)(r9)
> -	stw	r4, VCPU_CR(r9)
> +	stw	r12, VCPU_CR(r9)	/* CR is in the low half of r12 */

This would then need to be srdi r4, r12, 32; stw r4, VCPU_CR(r9)

>  BEGIN_FTR_SECTION
>  	ld	r3, HSTATE_CFAR(r13)
>  	std	r3, VCPU_CFAR(r9)
> @@ -1100,6 +1098,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>  	mfspr	r11, SPRN_SRR1
>  	std	r10, VCPU_SRR0(r9)
>  	std	r11, VCPU_SRR1(r9)
> +	srdi	r12, r12, 32		/* trap is in the high half of r12 */

and this would become clrldi r12,r12,32 though arguably that's not
totally necessary since we always do cmpwi/stw/lwz on r12 (but I'd
feel safer with the clrldi in place).

Cheers,
Paul.


More information about the Linuxppc-dev mailing list