[PATCH 1/2] KVM: PPC: Book3S HV: Simplify machine check handling
Aravinda Prasad
aravinda at linux.vnet.ibm.com
Wed Feb 20 20:20:24 AEDT 2019
On Wednesday 20 February 2019 06:35 AM, Paul Mackerras wrote:
> This makes the handling of machine check interrupts that occur inside
> a guest simpler and more robust, with less done in assembler code and
> in real mode.
>
> Now, when a machine check occurs inside a guest, we always get the
> machine check event struct and put a copy in the vcpu struct for the
> vcpu where the machine check occurred. We no longer call
> machine_check_queue_event() from kvmppc_realmode_mc_power7(), because
> on POWER8, when a vcpu is running on an offline secondary thread and
> we call machine_check_queue_event(), that calls irq_work_queue(),
> which doesn't work because the CPU is offline, but instead triggers
> the WARN_ON(lazy_irq_pending()) in pnv_smp_cpu_kill_self() (which
> fires again and again because nothing clears the condition).
>
> All that machine_check_queue_event() actually does is to cause the
> event to be printed to the console. For a machine check occurring in
> the guest, we now print the event in kvmppc_handle_exit_hv()
> instead.
>
> The assembly code at label machine_check_realmode now just calls C
> code and then continues exiting the guest. We no longer either
> synthesize a machine check for the guest in assembly code or return
> to the guest without a machine check.
>
> The code in kvmppc_handle_exit_hv() is extended to handle the case
> where the guest is not FWNMI-capable. In that case we now always
> synthesize a machine check interrupt for the guest. Previously, if
> the host thinks it has recovered the machine check fully, it would
> return to the guest without any notification that the machine check
> had occurred. If the machine check was caused by some action of the
> guest (such as creating duplicate SLB entries), it is much better to
> tell the guest that it has caused a problem. Therefore we now always
> generate a machine check interrupt for guests that are not
> FWNMI-capable.
>
> Signed-off-by: Paul Mackerras <paulus at ozlabs.org>
> ---
> arch/powerpc/include/asm/kvm_ppc.h | 3 +-
> arch/powerpc/kvm/book3s.c | 7 +++++
> arch/powerpc/kvm/book3s_hv.c | 18 +++++++++--
> arch/powerpc/kvm/book3s_hv_ras.c | 56 +++++++++------------------------
> arch/powerpc/kvm/book3s_hv_rmhandlers.S | 40 ++---------------------
> 5 files changed, 42 insertions(+), 82 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index b3bf4f6..d283d31 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -143,6 +143,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
>
> extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
> extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
> +extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
> extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
> extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
> extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
> @@ -646,7 +647,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
> unsigned int yield_count);
> long kvmppc_h_random(struct kvm_vcpu *vcpu);
> void kvmhv_commence_exit(int trap);
> -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
> +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
> void kvmppc_subcore_enter_guest(void);
> void kvmppc_subcore_exit_guest(void);
> long kvmppc_realmode_hmi_handler(void);
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 22a46c6..10c5579 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -195,6 +195,13 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
> }
> EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
>
> +void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
> +{
> + /* might as well deliver this straight away */
> + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, flags);
> +}
> +EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
> +
> void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
> {
> /* might as well deliver this straight away */
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 1860c0b..d8bf05a 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1215,6 +1215,22 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
> r = RESUME_GUEST;
> break;
> case BOOK3S_INTERRUPT_MACHINE_CHECK:
> + /* Print the MCE event to host console. */
> + machine_check_print_event_info(&vcpu->arch.mce_evt, false);
> +
> + /*
> + * If the guest can do FWNMI, exit to userspace so it can
> + * deliver a FWNMI to the guest.
> + * Otherwise we synthesize a machine check for the guest
> + * so that it knows that the machine check occurred.
> + */
> + if (!vcpu->kvm->arch.fwnmi_enabled) {
> + ulong flags = vcpu->arch.shregs.msr & 0x083c0000;
> + kvmppc_core_queue_machine_check(vcpu, flags);
> + r = RESUME_GUEST;
> + break;
> + }
> +
> /* Exit to guest with KVM_EXIT_NMI as exit reason */
> run->exit_reason = KVM_EXIT_NMI;
> run->hw.hardware_exit_reason = vcpu->arch.trap;
> @@ -1227,8 +1243,6 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
> run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
>
> r = RESUME_HOST;
> - /* Print the MCE event to host console. */
> - machine_check_print_event_info(&vcpu->arch.mce_evt, false);
> break;
> case BOOK3S_INTERRUPT_PROGRAM:
> {
> diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
> index 0787f12..9aa10b1 100644
> --- a/arch/powerpc/kvm/book3s_hv_ras.c
> +++ b/arch/powerpc/kvm/book3s_hv_ras.c
> @@ -69,7 +69,7 @@ static void reload_slb(struct kvm_vcpu *vcpu)
> *
> * Returns: 0 => exit guest, 1 => deliver machine check to guest
You have to remove the above comment as the function now returns nothing.
Reviewed-by: Aravinda Prasad <aravinda at linux.vnet.ibm.com>
Regards,
Aravinda
> */
> -static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
> +static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
> {
> unsigned long srr1 = vcpu->arch.shregs.msr;
> struct machine_check_event mce_evt;
> @@ -111,52 +111,24 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
> }
>
> /*
> - * See if we have already handled the condition in the linux host.
> - * We assume that if the condition is recovered then linux host
> - * will have generated an error log event that we will pick
> - * up and log later.
> - * Don't release mce event now. We will queue up the event so that
> - * we can log the MCE event info on host console.
> + * Now get the event and stash it in the vcpu struct so it can
> + * be handled by the primary thread in virtual mode. We can't
> + * call machine_check_queue_event() here if we are running on
> + * an offline secondary thread.
> */
> - if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
> - goto out;
> -
> - if (mce_evt.version == MCE_V1 &&
> - (mce_evt.severity == MCE_SEV_NO_ERROR ||
> - mce_evt.disposition == MCE_DISPOSITION_RECOVERED))
> - handled = 1;
> -
> -out:
> - /*
> - * For guest that supports FWNMI capability, hook the MCE event into
> - * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
> - * exit reason. On our way to exit we will pull this event from vcpu
> - * structure and print it from thread 0 of the core/subcore.
> - *
> - * For guest that does not support FWNMI capability (old QEMU):
> - * We are now going enter guest either through machine check
> - * interrupt (for unhandled errors) or will continue from
> - * current HSRR0 (for handled errors) in guest. Hence
> - * queue up the event so that we can log it from host console later.
> - */
> - if (vcpu->kvm->arch.fwnmi_enabled) {
> - /*
> - * Hook up the mce event on to vcpu structure.
> - * First clear the old event.
> - */
> - memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
> - if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
> - vcpu->arch.mce_evt = mce_evt;
> - }
> - } else
> - machine_check_queue_event();
> + if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
> + if (handled && mce_evt.version == MCE_V1)
> + mce_evt.disposition = MCE_DISPOSITION_RECOVERED;
> + } else {
> + memset(&mce_evt, 0, sizeof(mce_evt));
> + }
>
> - return handled;
> + vcpu->arch.mce_evt = mce_evt;
> }
>
> -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
> +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
> {
> - return kvmppc_realmode_mc_power7(vcpu);
> + kvmppc_realmode_mc_power7(vcpu);
> }
>
> /* Check if dynamic split is in force and return subcore size accordingly. */
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 9b8d50a..f24f6a2 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -2826,49 +2826,15 @@ kvm_cede_exit:
> #endif /* CONFIG_KVM_XICS */
> 3: b guest_exit_cont
>
> - /* Try to handle a machine check in real mode */
> + /* Try to do machine check recovery in real mode */
> machine_check_realmode:
> mr r3, r9 /* get vcpu pointer */
> bl kvmppc_realmode_machine_check
> nop
> + /* all machine checks go to virtual mode for further handling */
> ld r9, HSTATE_KVM_VCPU(r13)
> li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
> - /*
> - * For the guest that is FWNMI capable, deliver all the MCE errors
> - * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
> - * reason. This new approach injects machine check errors in guest
> - * address space to guest with additional information in the form
> - * of RTAS event, thus enabling guest kernel to suitably handle
> - * such errors.
> - *
> - * For the guest that is not FWNMI capable (old QEMU) fallback
> - * to old behaviour for backward compatibility:
> - * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
> - * through machine check interrupt (set HSRR0 to 0x200).
> - * For handled errors (no-fatal), just go back to guest execution
> - * with current HSRR0.
> - * if we receive machine check with MSR(RI=0) then deliver it to
> - * guest as machine check causing guest to crash.
> - */
> - ld r11, VCPU_MSR(r9)
> - rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
> - bne guest_exit_cont /* if so, exit to host */
> - /* Check if guest is capable of handling NMI exit */
> - ld r10, VCPU_KVM(r9)
> - lbz r10, KVM_FWNMI(r10)
> - cmpdi r10, 1 /* FWNMI capable? */
> - beq guest_exit_cont /* if so, exit with KVM_EXIT_NMI. */
> -
> - /* if not, fall through for backward compatibility. */
> - andi. r10, r11, MSR_RI /* check for unrecoverable exception */
> - beq 1f /* Deliver a machine check to guest */
> - ld r10, VCPU_PC(r9)
> - cmpdi r3, 0 /* Did we handle MCE ? */
> - bne 2f /* Continue guest execution. */
> - /* If not, deliver a machine check. SRR0/1 are already set */
> -1: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
> - bl kvmppc_msr_interrupt
> -2: b fast_interrupt_c_return
> + b guest_exit_cont
>
> /*
> * Call C code to handle a HMI in real mode.
>
--
Regards,
Aravinda
More information about the Linuxppc-dev
mailing list