[RFC PATCH v3 04/12] Validate r1 value before going to host kernel in virtual mode.

Tue Aug 27 05:31:48 EST 2013

From: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>

We can get machine checks from any context. We need to make sure that
we handle all of them correctly. Once we decode MCE reason and generate
MCE event, we continue in host kernel in virtual mode so that we can
log/display it later. But before going to virtual mode we need to make
sure that r1 points to host kernel stack. But machine check can occur
in any context and r1 may not always point to host kernel stack. In cases
where we can not trust r1 value, we should queue up the MCE event and return
from interrupt. This patch implements the additional checks that helps to
decide whether to deleiver machine check event to host kernel right away
or queue it up and return.

Signed-off-by: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
---
 arch/powerpc/kernel/exceptions-64s.S |   72 ++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 651a213..d82ebac 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -785,8 +785,78 @@ BEGIN_FTR_SECTION
 	bl	.save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.machine_check_early
+
+	/*
+	 * We are now going to host kernel in V mode. We need to make sure
+	 * that r1 points to host kernel stack.
+	 *
+	 * If we are coming from userspace then we can continue in host kernel
+	 * in V mode.
+	 * But if we are coming from kernel and r1 does not point to kernel
+	 * stack then we can not continue, instead we return from here.
+	 */
+
+	ld	r12,_MSR(r1)
+	andi.	r11,r12,MSR_PR		/* See if coming from user. */
+	bne	3f			/* continue if we are. */
+
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+	/*
+	 * We are coming from kernel context. Check if we are coming from
+	 * guest. if yes, then we can continue. We will fall through
+	 * do_kvm_200->kvmppc_interrupt which will setup r1 correctly.
+	 */
+	lbz	r11,HSTATE_IN_GUEST(r13)
+	cmpwi	r11,0			/* Check if coming from guest */
+	bne	3f			/* continue if we are. */
+
+	/*
+	 * So, we did not come from guest. That leaves three possibilities:
+	 * a. We come from secondary thread which just came out of nap and
+	 *    about to call kvm_start_guest.
+	 * b. We come from secondary thread which is about to go to nap
+	 *    state (see kvm_no_guest()).
+	 * c. We come from opal context and r1 may be pointing to opal
+	 *    kernel stack.
+	 */
+
+	lbz	r11,HSTATE_HWTHREAD_STATE(r13)
+	cmpwi	r11,KVM_HWTHREAD_IN_NAP	/* Was it nap-ing? or about to */
+	beq	0f		/* Queue up event and return from interrupt */
+#endif
+
+	/*
+	 * So far we checked all possible situations where we can not
+	 * trust r1. Now we can trust r1.
+	 *	r1 < 0		r1 points to host kernel stack
+	 *	r1 > 0		r1 points to opal stack
+	 */
+	ld	r11,GPR1(r1)
+	cmpdi	r11,0			/* check if r1 is in kernel. */
+	blt+	3f			/* Continue if yes. */
+
+	/*
+	 * r1 points to opal stack. Queue up the MCE event and return
+	 * from the interrupt. But before that, check if this is an
+	 * un-recoverable exception. If yes, then stay on emergency
+	 * stack and panic.
+	 */
+0:	andi.	r11,r12,MSR_RI
+	bne	2f
+
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.unrecoverable_exception
+	b	1b
+
+	/*
+	 * Return from MC interrupt.
+	 * TODO: Queue up the MCE event so that we can log it later.
+	 */
+2:	MACHINE_CHECK_HANDLER_WINDUP
+	rfid
+
 	/* Deliver the machine check to host kernel in V mode. */
-	MACHINE_CHECK_HANDLER_WINDUP
+3:	MACHINE_CHECK_HANDLER_WINDUP
 	b	machine_check_pSeries
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)