[PATCH] KVM: PPC: Book3S HV: Fix kvmhv_copy_tofrom_guest_radix

Fabiano Rosas farosas at linux.ibm.com
Tue Aug 3 09:49:41 AEST 2021


This function was introduced along with nested HV guest support. It
uses the platform's Radix MMU quadrants[1] to provide a nested
hypervisor with fast access to its nested guests memory
(H_COPY_TOFROM_GUEST hypercall). It has also since been added as a
fast path for the kvmppc_ld/st routines which are used during
instruction emulation.

The commit def0bfdbd603 ("powerpc: use probe_user_read() and
probe_user_write()") changed the low level copy function from
raw_copy_from_user to probe_user_read, which adds a check to
access_ok. In powerpc that is:

 static inline bool __access_ok(unsigned long addr, unsigned long size)
 {
        return addr < TASK_SIZE_MAX && size <= TASK_SIZE_MAX - addr;
 }

and TASK_SIZE_MAX is 0x0010000000000000UL for 64-bit, which means that
setting the two MSBs of the effective address (which correspond to the
quadrant) now cause access_ok to reject the access.

This was not caught earlier because the most common code path via
kvmppc_ld/st contains a fallback (kvm_read_guest) that is likely to
succeed for L1 guests. For nested guests there is no fallback.

Another issue is that probe_user_read (now __copy_from_user_nofault)
does not return the number of not copied bytes in case of failure, so
the destination memory is not being cleared anymore in
kvmhv_copy_from_guest_radix:

 ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n);
 if (ret > 0)                            <-- always false!
        memset(to + (n - ret), 0, ret);

This patch fixes both issues by introducing two new functions that set
the quadrant bit of the effective address only after checking
access_ok and moving the memset closer to __copy_to_user_inatomic.

1 - for more on quadrants see commit d7b456152230 ("KVM: PPC: Book3S
HV: Implement functions to access quadrants 1 & 2")

Fixes: def0bfdbd603 ("powerpc: use probe_user_read() and probe_user_write()")
Signed-off-by: Fabiano Rosas <farosas at linux.ibm.com>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 63 ++++++++++++++++++++------
 1 file changed, 49 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index b5905ae4377c..076a8e4a9135 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -30,12 +30,57 @@
  */
 static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
 
+/* LPIDR and PIDR must have already been set */
+static long __copy_from_guest_quadrant(void *dst, void __user *src, size_t size,
+				       unsigned long quadrant)
+{
+	long ret = size;
+	mm_segment_t old_fs = force_uaccess_begin();
+
+	if (access_ok(src, size)) {
+		src += (quadrant << 62);
+
+		pagefault_disable();
+		ret = __copy_from_user_inatomic((void __user *)dst, src, size);
+		pagefault_enable();
+	}
+	force_uaccess_end(old_fs);
+
+	if (!ret)
+		return ret;
+
+	memset(dst + (size - ret), 0, ret);
+
+	return -EFAULT;
+}
+
+/* LPIDR and PIDR must have already been set */
+static long __copy_to_guest_quadrant(void __user *dst, void *src, size_t size,
+				     unsigned long quadrant)
+{
+	long ret = -EFAULT;
+	mm_segment_t old_fs = force_uaccess_begin();
+
+	if (access_ok(dst, size)) {
+		dst += (quadrant << 62);
+
+		pagefault_disable();
+		ret = __copy_to_user_inatomic(dst, (void __user *)src, size);
+		pagefault_enable();
+	}
+	force_uaccess_end(old_fs);
+
+	if (ret)
+		return -EFAULT;
+	return 0;
+}
+
 unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
 					      gva_t eaddr, void *to, void *from,
 					      unsigned long n)
 {
 	int old_pid, old_lpid;
-	unsigned long quadrant, ret = n;
+	unsigned long quadrant, ret;
 	bool is_load = !!to;
 
 	/* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */
@@ -47,10 +92,6 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
 	quadrant = 1;
 	if (!pid)
 		quadrant = 2;
-	if (is_load)
-		from = (void *) (eaddr | (quadrant << 62));
-	else
-		to = (void *) (eaddr | (quadrant << 62));
 
 	preempt_disable();
 
@@ -66,9 +107,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
 	isync();
 
 	if (is_load)
-		ret = copy_from_user_nofault(to, (const void __user *)from, n);
+		ret = __copy_from_guest_quadrant(to, (void __user *)eaddr, n, quadrant);
 	else
-		ret = copy_to_user_nofault((void __user *)to, from, n);
+		ret = __copy_to_guest_quadrant((void __user *)eaddr, from, n, quadrant);
 
 	/* switch the pid first to avoid running host with unallocated pid */
 	if (quadrant == 1 && pid != old_pid)
@@ -109,13 +150,7 @@ static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
 long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to,
 				 unsigned long n)
 {
-	long ret;
-
-	ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n);
-	if (ret > 0)
-		memset(to + (n - ret), 0, ret);
-
-	return ret;
+	return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n);
 }
 EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix);
 
-- 
2.29.2



More information about the Linuxppc-dev mailing list