[PATCH v2 23/37] KVM: PPC: Book3S HV P9: Implement the rest of the P9 path in C

Sat Feb 27 09:37:28 AEDT 2021

Nicholas Piggin <npiggin at gmail.com> writes:

Hi, thanks for this. It helped me clarify a bunch of details that I
haven't understood while reading the asm code.

Some comments below:

> Almost all logic is moved to C, by introducing a new in_guest mode that
> selects and branches very early in the interrupt handler to the P9 exit
> code.
>
> The remaining assembly is only about 160 lines of low level stack setup,
> with VCPU vs host register save and restore, plus a small shim to the
> legacy paths in the interrupt handler.
>
> There are two motivations for this, the first is just make the code more
> maintainable being in C. The second is to reduce the amount of code
> running in a special KVM mode, "realmode". I put that in quotes because
> with radix it is no longer necessarily real-mode in the MMU, but it
> still has to be treated specially because it may be in real-mode, and
> has various important registers like PID, DEC, TB, etc set to guest.
> This is hostile to the rest of Linux and can't use arbitrary kernel
> functionality or be instrumented well.
>
> This initial patch is a reasonably faithful conversion of the asm code.
> It does lack any loop to return quickly back into the guest without
> switching out of realmode in the case of unimportant or easily handled
> interrupts, as explained in the previous change, handling HV interrupts
> in real mode is not so important for P9.
>
> Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
> ---

<snip>

> diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
> index f826c8dc2e19..cc7b76865a16 100644
> --- a/arch/powerpc/kvm/book3s_64_entry.S
> +++ b/arch/powerpc/kvm/book3s_64_entry.S
> @@ -1,10 +1,14 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
>  #include <asm/asm-offsets.h>
>  #include <asm/cache.h>
> +#include <asm/code-patching-asm.h>
>  #include <asm/exception-64s.h>
> +#include <asm/export.h>
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_book3s_asm.h>
>  #include <asm/ppc_asm.h>
>  #include <asm/reg.h>
> +#include <asm/ultravisor-api.h>
>
>  /*
>   * These are branched to from interrupt handlers in exception-64s.S which set
> @@ -13,13 +17,24 @@
>  .global	kvmppc_hcall
>  .balign IFETCH_ALIGN_BYTES
>  kvmppc_hcall:
> +	lbz	r10,HSTATE_IN_GUEST(r13)
> +	cmpwi	r10,KVM_GUEST_MODE_GUEST_HV_FAST
> +	beq	kvmppc_p9_exit_hcall
>  	ld	r10,PACA_EXGEN+EX_R13(r13)
>  	SET_SCRATCH0(r10)
>  	li	r10,0xc00
> +	li	r11,PACA_EXGEN
> +	b	1f
>
>  .global	kvmppc_interrupt
>  .balign IFETCH_ALIGN_BYTES
>  kvmppc_interrupt:
> +	std	r10,HSTATE_SCRATCH0(r13)
> +	lbz	r10,HSTATE_IN_GUEST(r13)
> +	cmpwi	r10,KVM_GUEST_MODE_GUEST_HV_FAST
> +	beq	kvmppc_p9_exit_interrupt
> +	ld	r10,HSTATE_SCRATCH0(r13)
> +	lbz	r11,HSTATE_IN_GUEST(r13)
>  	li	r11,PACA_EXGEN
>  	cmpdi	r10,0x200
>  	bgt+	1f
> @@ -114,3 +129,169 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>  	GET_SCRATCH0(r13)
>  	HRFI_TO_KERNEL
>  #endif
> +
> +/* Stack frame offsets for kvmppc_hv_entry */
> +#define SFS			208
> +#define STACK_SLOT_VCPU		(SFS-8)
> +#define STACK_SLOT_NVGPRS	(SFS-152)	/* 18 gprs */
> +
> +/*
> + * void kvmppc_p9_enter_guest(struct vcpu *vcpu);
> + *
> + * Enter the guest on a ISAv3.0 or later system where we have exactly
> + * one vcpu per vcore, and both the host and guest are radix, and threads
> + * are set to "indepdent mode".
> + */
> +.balign	IFETCH_ALIGN_BYTES
> +_GLOBAL(kvmppc_p9_enter_guest)
> +EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest)
> +	mflr	r0
> +	std	r0,PPC_LR_STKOFF(r1)
> +	stdu	r1,-SFS(r1)
> +
> +	std	r1,HSTATE_HOST_R1(r13)
> +	std	r3,STACK_SLOT_VCPU(r1)

The vcpu was stored on the paca previously. I don't get the change,
could you explain?

> +
> +	mfcr	r4
> +	stw	r4,SFS+8(r1)
> +
> +	reg = 14
> +	.rept	18
> +	std	reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
> +	reg = reg + 1
> +	.endr
> +
> +	ld	r4,VCPU_LR(r3)
> +	mtlr	r4
> +	ld	r4,VCPU_CTR(r3)
> +	mtctr	r4
> +	ld	r4,VCPU_XER(r3)
> +	mtspr	SPRN_XER,r4
> +
> +	ld	r1,VCPU_CR(r3)
> +
> +BEGIN_FTR_SECTION
> +	ld	r4,VCPU_CFAR(r3)
> +	mtspr	SPRN_CFAR,r4
> +END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
> +BEGIN_FTR_SECTION
> +	ld	r4,VCPU_PPR(r3)
> +	mtspr	SPRN_PPR,r4
> +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
> +
> +	reg = 4
> +	.rept	28
> +	ld	reg,__VCPU_GPR(reg)(r3)
> +	reg = reg + 1
> +	.endr
> +
> +	ld	r4,VCPU_KVM(r3)
> +	lbz	r4,KVM_SECURE_GUEST(r4)
> +	cmpdi	r4,0
> +	ld	r4,VCPU_GPR(R4)(r3)
> +	bne	.Lret_to_ultra
> +
> +	mtcr	r1
> +
> +	ld	r0,VCPU_GPR(R0)(r3)
> +	ld	r1,VCPU_GPR(R1)(r3)
> +	ld	r2,VCPU_GPR(R2)(r3)
> +	ld	r3,VCPU_GPR(R3)(r3)
> +
> +	HRFI_TO_GUEST
> +	b	.
> +
> +	/*
> +	 * Use UV_RETURN ultracall to return control back to the Ultravisor
> +	 * after processing an hypercall or interrupt that was forwarded
> +	 * (a.k.a. reflected) to the Hypervisor.
> +	 *
> +	 * All registers have already been reloaded except the ucall requires:
> +	 *   R0 = hcall result
> +	 *   R2 = SRR1, so UV can detect a synthesized interrupt (if any)
> +	 *   R3 = UV_RETURN
> +	 */
> +.Lret_to_ultra:
> +	mtcr	r1
> +	ld	r1,VCPU_GPR(R1)(r3)
> +
> +	ld	r0,VCPU_GPR(R3)(r3)
> +	mfspr	r2,SPRN_SRR1
> +	LOAD_REG_IMMEDIATE(r3, UV_RETURN)
> +	sc	2
> +
> +/*
> + * kvmppc_p9_exit_hcall and kvmppc_p9_exit_interrupt are branched to from
> + * above if the interrupt was taken for a guest that was entered via
> + * kvmppc_p9_enter_guest().
> + *
> + * This code recovers the host stack and vcpu pointer, saves all GPRs and
> + * CR, LR, CTR, XER as well as guest MSR and NIA into the VCPU, then re-
> + * establishes the host stack and registers to return from  the
> + * kvmppc_p9_enter_guest() function.
> + */
> +.balign	IFETCH_ALIGN_BYTES
> +kvmppc_p9_exit_hcall:
> +	mfspr	r11,SPRN_SRR0
> +	mfspr	r12,SPRN_SRR1
> +	li	r10,0xc00
> +	std	r10,HSTATE_SCRATCH0(r13)
> +
> +.balign	IFETCH_ALIGN_BYTES
> +kvmppc_p9_exit_interrupt:
> +	std     r1,HSTATE_SCRATCH1(r13)
> +	std     r3,HSTATE_SCRATCH2(r13)
> +	ld	r1,HSTATE_HOST_R1(r13)
> +	ld	r3,STACK_SLOT_VCPU(r1)
> +
> +	std	r9,VCPU_CR(r3)
> +
> +1:
> +	std	r11,VCPU_PC(r3)
> +	std	r12,VCPU_MSR(r3)
> +
> +	reg = 14
> +	.rept	18
> +	std	reg,__VCPU_GPR(reg)(r3)
> +	reg = reg + 1
> +	.endr
> +
> +	/* r1, r3, r9-r13 are saved to vcpu by C code */

If we just saved r1 and r3, why don't we put them in the vcpu right now?
That would avoid having the C code knowing about scratch areas.

> +	std	r0,VCPU_GPR(R0)(r3)
> +	std	r2,VCPU_GPR(R2)(r3)
> +	reg = 4
> +	.rept	5
> +	std	reg,__VCPU_GPR(reg)(r3)
> +	reg = reg + 1
> +	.endr
> +
> +	ld	r2,PACATOC(r13)
> +
> +	mflr	r4
> +	std	r4,VCPU_LR(r3)
> +	mfspr	r4,SPRN_XER
> +	std	r4,VCPU_XER(r3)
> +
> +	reg = 14
> +	.rept	18
> +	ld	reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
> +	reg = reg + 1
> +	.endr
> +
> +	lwz	r4,SFS+8(r1)
> +	mtcr	r4
> +
> +	/*
> +	 * Flush the link stack here, before executing the first blr on the
> +	 * way out of the guest.
> +	 *
> +	 * The link stack won't match coming out of the guest anyway so the
> +	 * only cost is the flush itself. The call clobbers r0.
> +	 */
> +1:	nop
> +	patch_site 1b patch__call_kvm_flush_link_stack_2
> +
> +	addi	r1,r1,SFS
> +	ld	r0,PPC_LR_STKOFF(r1)
> +	mtlr	r0
> +	blr
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 1997cf347d3e..28a2761515e3 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1421,6 +1421,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
>  	 */
>  	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
>  		r = RESUME_PAGE_FAULT;
> +		if (vcpu->arch.fault_dsisr == HDSISR_CANARY)
> +			r = RESUME_GUEST; /* Just retry if it's the canary */
>  		break;
>  	case BOOK3S_INTERRUPT_H_INST_STORAGE:
>  		vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
> @@ -3736,14 +3738,14 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
>  		vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
>  		vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
>  		mtspr(SPRN_PSSCR_PR, host_psscr);
> -
>  		/* H_CEDE has to be handled now, not later */
> -		if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
> +		if (trap == BOOK3S_INTERRUPT_SYSCALL &&
>  		    kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
>  			kvmppc_cede(vcpu);
>  			kvmppc_set_gpr(vcpu, 3, 0);
>  			trap = 0;
>  		}
> +
>  	} else {
>  		kvmppc_xive_push_vcpu(vcpu);
>  		trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
> @@ -3768,9 +3770,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
>  			}
>  		}
>  		kvmppc_xive_pull_vcpu(vcpu);
> +
> +		vcpu->arch.slb_max = 0;
>  	}
>
> -	vcpu->arch.slb_max = 0;
>  	dec = mfspr(SPRN_DEC);
>  	if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
>  		dec = (s32) dec;
> @@ -4429,11 +4432,19 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
>  		else
>  			r = kvmppc_run_vcpu(vcpu);
>
> -		if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
> -		    !(vcpu->arch.shregs.msr & MSR_PR)) {
> -			trace_kvm_hcall_enter(vcpu);
> -			r = kvmppc_pseries_do_hcall(vcpu);
> -			trace_kvm_hcall_exit(vcpu, r);
> +		if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
> +			if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
> +				/*
> +				 * Guest userspace executed sc 1, reflect it
> +				 * back as a privileged program check interrupt.
> +				 */
> +				kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
> +				r = RESUME_GUEST;

This is in conflict with this snippet in kvmppc_handle_exit_hv:

	case BOOK3S_INTERRUPT_SYSCALL:
	{
		/* hcall - punt to userspace */
		int i;

		/* hypercall with MSR_PR has already been handled in rmode,
		 * and never reaches here.
		 */

That function already queues some 0x700s so maybe we could move this one
in there as well.

> +			} else {
> +				trace_kvm_hcall_enter(vcpu);
> +				r = kvmppc_pseries_do_hcall(vcpu);
> +				trace_kvm_hcall_exit(vcpu, r);
> +			}
>  			kvmppc_core_prepare_to_enter(vcpu);
>  		} else if (r == RESUME_PAGE_FAULT) {
>  			srcu_idx = srcu_read_lock(&kvm->srcu);
> diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c b/arch/powerpc/kvm/book3s_hv_interrupt.c
> new file mode 100644
> index 000000000000..5a7b036c447f
> --- /dev/null
> +++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
> @@ -0,0 +1,221 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +#include <linux/kernel.h>
> +#include <linux/kvm_host.h>
> +#include <asm/asm-prototypes.h>
> +#include <asm/dbell.h>
> +#include <asm/kvm_ppc.h>
> +
> +#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
> +static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
> +{
> +	struct kvmppc_vcore *vc = vcpu->arch.vcore;
> +	u64 tb = mftb() - vc->tb_offset_applied;
> +
> +	vcpu->arch.cur_activity = next;
> +	vcpu->arch.cur_tb_start = tb;
> +}
> +
> +static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
> +{
> +	struct kvmppc_vcore *vc = vcpu->arch.vcore;
> +	struct kvmhv_tb_accumulator *curr;
> +	u64 tb = mftb() - vc->tb_offset_applied;
> +	u64 prev_tb;
> +	u64 delta;
> +	u64 seq;
> +
> +	curr = vcpu->arch.cur_activity;
> +	vcpu->arch.cur_activity = next;
> +	prev_tb = vcpu->arch.cur_tb_start;
> +	vcpu->arch.cur_tb_start = tb;
> +
> +	if (!curr)
> +		return;
> +
> +	delta = tb - prev_tb;
> +
> +	seq = curr->seqcount;
> +	curr->seqcount = seq + 1;
> +	smp_wmb();
> +	curr->tb_total += delta;
> +	if (seq == 0 || delta < curr->tb_min)
> +		curr->tb_min = delta;
> +	if (delta > curr->tb_max)
> +		curr->tb_max = delta;
> +	smp_wmb();
> +	curr->seqcount = seq + 2;
> +}
> +
> +#define start_timing(vcpu, next) __start_timing(vcpu, next)
> +#define end_timing(vcpu) __start_timing(vcpu, NULL)
> +#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next)
> +#else
> +#define start_timing(vcpu, next) do {} while (0)
> +#define end_timing(vcpu) do {} while (0)
> +#define accumulate_time(vcpu, next) do {} while (0)
> +#endif
> +
> +static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
> +{
> +	asm volatile("slbmfev  %0,%1" : "=r" (*slbev) : "r" (idx));
> +	asm volatile("slbmfee  %0,%1" : "=r" (*slbee) : "r" (idx));
> +}
> +
> +static inline void mtslb(unsigned int idx, u64 slbee, u64 slbev)
> +{
> +	BUG_ON((slbee & 0xfff) != idx);
> +
> +	asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
> +}
> +
> +static inline void slb_invalidate(unsigned int ih)
> +{
> +	asm volatile("slbia %0" :: "i"(ih));
> +}
> +
> +/*
> + * Malicious or buggy radix guests may have inserted SLB entries
> + * (only 0..3 because radix always runs with UPRT=1), so these must
> + * be cleared here to avoid side-channels. slbmte is used rather
> + * than slbia, as it won't clear cached translations.
> + */
> +static void radix_clear_slb(void)
> +{
> +	u64 slbee, slbev;
> +	int i;
> +
> +	for (i = 0; i < 4; i++) {
> +		mfslb(i, &slbee, &slbev);
> +		if (unlikely(slbee || slbev)) {
> +			slbee = i;
> +			slbev = 0;
> +			mtslb(i, slbee, slbev);
> +		}
> +	}
> +}
> +
> +int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu)
> +{
> +	u64 *exsave;
> +	unsigned long msr = mfmsr();
> +	int trap;
> +
> +	start_timing(vcpu, &vcpu->arch.rm_entry);
> +
> +	vcpu->arch.ceded = 0;
> +
> +	WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
> +	WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
> +
> +	mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
> +	mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
> +
> +	/*
> +	 * On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
> +	 * Interrupt (HDSI) the HDSISR is not be updated at all.

s/be //

> +	 *
> +	 * To work around this we put a canary value into the HDSISR before
> +	 * returning to a guest and then check for this canary when we take a
> +	 * HDSI. If we find the canary on a HDSI, we know the hardware didn't
> +	 * update the HDSISR. In this case we return to the guest to retake the
> +	 * HDSI which should correctly update the HDSISR the second time HDSI
> +	 * entry.
> +	 *
> +	 * Just do this on all p9 processors for now.
> +	 */
> +	mtspr(SPRN_HDSISR, HDSISR_CANARY);
> +
> +	accumulate_time(vcpu, &vcpu->arch.guest_time);
> +
> +	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST_HV_FAST;
> +	kvmppc_p9_enter_guest(vcpu);
> +	// Radix host and guest means host never runs with guest MMU state
> +	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
> +
> +	accumulate_time(vcpu, &vcpu->arch.rm_intr);
> +
> +	/* Get these from r11/12 and paca exsave */
> +	vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
> +	vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
> +	vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
> +	vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
> +
> +	trap = local_paca->kvm_hstate.scratch0 & ~0x2;

Couldn't we return the trap from kvmppc_p9_enter_guest? Seems like a
nice pattern to have and avoids exposing the IVEC+0x2 magic which is hidden
in asm already.

> +	if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
> +		exsave = local_paca->exgen;
> +	} else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
> +		exsave = local_paca->exnmi;
> +	} else { /* trap == 0x200 */
> +		exsave = local_paca->exmc;
> +	}
> +
> +	vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
> +	vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
> +	vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
> +	vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
> +	vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
> +	vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
> +	vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
> +	vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
> +	vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
> +	vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
> +
> +	vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
> +
> +	if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) {
> +		vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
> +		vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
> +		kvmppc_realmode_machine_check(vcpu);
> +
> +	} else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
> +		kvmppc_realmode_hmi_handler();
> +
> +	} else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
> +		vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
> +
> +	} else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) {
> +		vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
> +		vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
> +		vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
> +
> +	} else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
> +		vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
> +
> +	} else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) {
> +		vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
> +
> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> +	/*
> +	 * Softpatch interrupt for transactional memory emulation cases
> +	 * on POWER9 DD2.2.  This is early in the guest exit path - we
> +	 * haven't saved registers or done a treclaim yet.
> +	 */
> +	} else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
> +		vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
> +
> +		/*
> +		 * The cases we want to handle here are those where the guest
> +		 * is in real suspend mode and is trying to transition to
> +		 * transactional mode.
> +		 */
> +		if (local_paca->kvm_hstate.fake_suspend &&
> +				(vcpu->arch.shregs.msr & MSR_TS_S)) {
> +			if (kvmhv_p9_tm_emulation_early(vcpu)) {
> +				/* Prevent it being handled again. */
> +				trap = 0;
> +			}
> +		}
> +#endif
> +	}
> +
> +	radix_clear_slb();
> +
> +	__mtmsrd(msr, 0);

Isn't this the same as mtmsr(msr) ?

> +
> +	accumulate_time(vcpu, &vcpu->arch.rm_exit);
> +
> +	end_timing(vcpu);
> +
> +	return trap;
> +}
> +EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9);

<snip>