[PATCH v4 22/46] KVM: PPC: Book3S HV P9: Stop handling hcalls in real-mode in the P9 path
Fabiano Rosas
farosas at linux.ibm.com
Wed Mar 24 09:57:11 AEDT 2021
Nicholas Piggin <npiggin at gmail.com> writes:
> In the interest of minimising the amount of code that is run in
> "real-mode", don't handle hcalls in real mode in the P9 path.
>
> POWER8 and earlier are much more expensive to exit from HV real mode
> and switch to host mode, because on those processors HV interrupts get
> to the hypervisor with the MMU off, and the other threads in the core
> need to be pulled out of the guest, and SLBs all need to be saved,
> ERATs invalidated, and host SLB reloaded before the MMU is re-enabled
> in host mode. Hash guests also require a lot of hcalls to run. The
> XICS interrupt controller requires hcalls to run.
>
> By contrast, POWER9 has independent thread switching, and in radix mode
> the hypervisor is already in a host virtual memory mode when the HV
> interrupt is taken. Radix + xive guests don't need hcalls to handle
> interrupts or manage translations.
>
> So it's much less important to handle hcalls in real mode in P9.
>
> Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
> ---
<snip>
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index fa7614c37e08..17739aaee3d8 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1142,12 +1142,13 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
> }
>
> /*
> - * Handle H_CEDE in the nested virtualization case where we haven't
> - * called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
> + * Handle H_CEDE in the P9 path where we don't call the real-mode hcall
> + * handlers in book3s_hv_rmhandlers.S.
> + *
> * This has to be done early, not in kvmppc_pseries_do_hcall(), so
> * that the cede logic in kvmppc_run_single_vcpu() works properly.
> */
> -static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
> +static void kvmppc_cede(struct kvm_vcpu *vcpu)
> {
> vcpu->arch.shregs.msr |= MSR_EE;
> vcpu->arch.ceded = 1;
> @@ -1403,9 +1404,15 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
> /* hcall - punt to userspace */
> int i;
>
> - /* hypercall with MSR_PR has already been handled in rmode,
> - * and never reaches here.
> - */
> + if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
> + /*
> + * Guest userspace executed sc 1, reflect it back as a
> + * privileged program check interrupt.
> + */
> + kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
> + r = RESUME_GUEST;
> + break;
> + }
This patch bypasses sc_1_fast_return so it breaks KVM-PR. L1 loops with
the following output:
[ 9.503929][ T3443] Couldn't emulate instruction 0x4e800020 (op 19 xop 16)
[ 9.503990][ T3443] kvmppc_exit_pr_progint: emulation at 48f4 failed (4e800020)
[ 9.504080][ T3443] Couldn't emulate instruction 0x4e800020 (op 19 xop 16)
[ 9.504170][ T3443] kvmppc_exit_pr_progint: emulation at 48f4 failed (4e800020)
0x4e800020 is a blr after a sc 1 in SLOF.
For KVM-PR we need to inject a 0xc00 at some point, either here or
before branching to no_try_real in book3s_hv_rmhandlers.S.
>
> run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
> for (i = 0; i < 9; ++i)
> @@ -3663,6 +3670,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
> return trap;
> }
>
> +static inline bool hcall_is_xics(unsigned long req)
> +{
> + return (req == H_EOI || req == H_CPPR || req == H_IPI ||
> + req == H_IPOLL || req == H_XIRR || req == H_XIRR_X);
> +}
> +
> /*
> * Virtual-mode guest entry for POWER9 and later when the host and
> * guest are both using the radix MMU. The LPIDR has already been set.
> @@ -3774,15 +3787,36 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
> /* H_CEDE has to be handled now, not later */
> if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
> kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
> - kvmppc_nested_cede(vcpu);
> + kvmppc_cede(vcpu);
> kvmppc_set_gpr(vcpu, 3, 0);
> trap = 0;
> }
> } else {
> kvmppc_xive_push_vcpu(vcpu);
> trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
> + if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
> + !(vcpu->arch.shregs.msr & MSR_PR)) {
> + unsigned long req = kvmppc_get_gpr(vcpu, 3);
> +
> + /* H_CEDE has to be handled now, not later */
> + if (req == H_CEDE) {
> + kvmppc_cede(vcpu);
> + kvmppc_xive_cede_vcpu(vcpu); /* may un-cede */
> + kvmppc_set_gpr(vcpu, 3, 0);
> + trap = 0;
> +
> + /* XICS hcalls must be handled before xive is pulled */
> + } else if (hcall_is_xics(req)) {
> + int ret;
> +
> + ret = kvmppc_xive_xics_hcall(vcpu, req);
> + if (ret != H_TOO_HARD) {
> + kvmppc_set_gpr(vcpu, 3, ret);
> + trap = 0;
> + }
> + }
> + }
> kvmppc_xive_pull_vcpu(vcpu);
> -
> }
>
> vcpu->arch.slb_max = 0;
> @@ -4442,8 +4476,11 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
> else
> r = kvmppc_run_vcpu(vcpu);
>
> - if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
> - !(vcpu->arch.shregs.msr & MSR_PR)) {
> + if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
> + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) {
> + r = RESUME_GUEST;
> + continue;
> + }
Note that this hunk might need to be dropped.
> trace_kvm_hcall_enter(vcpu);
> r = kvmppc_pseries_do_hcall(vcpu);
> trace_kvm_hcall_exit(vcpu, r);
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index c11597f815e4..2d0d14ed1d92 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -1397,9 +1397,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
> mr r4,r9
> bge fast_guest_return
> 2:
> + /* If we came in through the P9 short path, no real mode hcalls */
> + lwz r0, STACK_SLOT_SHORT_PATH(r1)
> + cmpwi r0, 0
> + bne no_try_real
> /* See if this is an hcall we can handle in real mode */
> cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
> beq hcall_try_real_mode
> +no_try_real:
>
> /* Hypervisor doorbell - exit only if host IPI flag set */
> cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
More information about the Linuxppc-dev
mailing list