[RFC PATCH 2/2] arch/powerpc/kvm: Fix doorbells for nested KVM guests on PowerNV

Nicholas Piggin npiggin at gmail.com
Thu Jul 4 22:10:05 AEST 2024


On Fri Jun 28, 2024 at 4:03 AM AEST, Gautam Menghani wrote:
> commit 6398326b9ba1("KVM: PPC: Book3S HV P9: Stop using vc->dpdes")
> introduced an optimization to use only vcpu->doorbell_request for SMT
> emulation for Power9 and above guests, but the code for nested guests 
> still relies on the old way of handling doorbells, due to which an L2
> guest cannot be booted with XICS with SMT>1. The command to repro
> this issue is:
>
> qemu-system-ppc64 \
> 	-drive file=rhel.qcow2,format=qcow2 \
> 	-m 20G \
> 	-smp 8,cores=1,threads=8 \
> 	-cpu  host \
> 	-nographic \
> 	-machine pseries,ic-mode=xics -accel kvm
>
> Fix the plumbing to utilize vcpu->doorbell_request instead of vcore->dpdes 
> on P9 and above.
>
> Fixes: 6398326b9ba1 ("KVM: PPC: Book3S HV P9: Stop using vc->dpdes")
> Signed-off-by: Gautam Menghani <gautam at linux.ibm.com>
> ---
>  arch/powerpc/kvm/book3s_hv.c        |  9 ++++++++-
>  arch/powerpc/kvm/book3s_hv_nested.c | 20 ++++++++++++++++----
>  2 files changed, 24 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index cea28ac05923..0586fa636707 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -4178,6 +4178,9 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns
>  	}
>  	hvregs.hdec_expiry = time_limit;
>  
> +	// clear doorbell bit as hvregs already has the info
> +	vcpu->arch.doorbell_request = 0;
> +
>  	/*
>  	 * When setting DEC, we must always deal with irq_work_raise
>  	 * via NMI vs setting DEC. The problem occurs right as we
> @@ -4694,6 +4697,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
>  	struct kvm_nested_guest *nested = vcpu->arch.nested;
>  	unsigned long flags;
>  	u64 tb;
> +	bool doorbell_pending;
>  
>  	trace_kvmppc_run_vcpu_enter(vcpu);
>  
> @@ -4752,6 +4756,9 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
>  	 */
>  	smp_mb();
>  
> +	doorbell_pending = !cpu_has_feature(CPU_FTR_ARCH_300) &&
> +				vcpu->arch.doorbell_request;

Hmm... is the feature test flipped here?

> +
>  	if (!nested) {
>  		kvmppc_core_prepare_to_enter(vcpu);
>  		if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
> @@ -4769,7 +4776,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
>  				lpcr |= LPCR_MER;
>  		}
>  	} else if (vcpu->arch.pending_exceptions ||
> -		   vcpu->arch.doorbell_request ||
> +		   doorbell_pending ||
>  		   xive_interrupt_pending(vcpu)) {
>  		vcpu->arch.ret = RESUME_HOST;
>  		goto out;
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
> index 05f5220960c6..b34eefa6b268 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -32,7 +32,10 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
>  	struct kvmppc_vcore *vc = vcpu->arch.vcore;
>  
>  	hr->pcr = vc->pcr | PCR_MASK;
> -	hr->dpdes = vc->dpdes;
> +	if (cpu_has_feature(CPU_FTR_ARCH_300))
> +		hr->dpdes = vcpu->arch.doorbell_request;
> +	else
> +		hr->dpdes = vc->dpdes;
>  	hr->hfscr = vcpu->arch.hfscr;
>  	hr->tb_offset = vc->tb_offset;
>  	hr->dawr0 = vcpu->arch.dawr0;

Great find.

Nested is all POWER9 and later only, so I think you can just
change to using doorbell_request always.

And probably don't have to do anything for book3s_hv.c unless
I'm mistaken about the feature test.

Thanks,
Nick

> @@ -105,7 +108,10 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu,
>  {
>  	struct kvmppc_vcore *vc = vcpu->arch.vcore;
>  
> -	hr->dpdes = vc->dpdes;
> +	if (cpu_has_feature(CPU_FTR_ARCH_300))
> +		hr->dpdes = vcpu->arch.doorbell_request;
> +	else
> +		hr->dpdes = vc->dpdes;
>  	hr->purr = vcpu->arch.purr;
>  	hr->spurr = vcpu->arch.spurr;
>  	hr->ic = vcpu->arch.ic;
> @@ -143,7 +149,10 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *
>  	struct kvmppc_vcore *vc = vcpu->arch.vcore;
>  
>  	vc->pcr = hr->pcr | PCR_MASK;
> -	vc->dpdes = hr->dpdes;
> +	if (cpu_has_feature(CPU_FTR_ARCH_300))
> +		vcpu->arch.doorbell_request = hr->dpdes;
> +	else
> +		vc->dpdes = hr->dpdes;
>  	vcpu->arch.hfscr = hr->hfscr;
>  	vcpu->arch.dawr0 = hr->dawr0;
>  	vcpu->arch.dawrx0 = hr->dawrx0;
> @@ -170,7 +179,10 @@ void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
>  {
>  	struct kvmppc_vcore *vc = vcpu->arch.vcore;
>  
> -	vc->dpdes = hr->dpdes;
> +	if (cpu_has_feature(CPU_FTR_ARCH_300) && !vcpu->arch.doorbell_request)
> +		vcpu->arch.doorbell_request = hr->dpdes;
> +	else
> +		vc->dpdes = hr->dpdes;
>  	vcpu->arch.hfscr = hr->hfscr;
>  	vcpu->arch.purr = hr->purr;
>  	vcpu->arch.spurr = hr->spurr;



More information about the Linuxppc-dev mailing list